--- old/make/autoconf/build-aux/config.sub 2018-09-25 19:23:21.000000000 +0300 +++ new/make/autoconf/build-aux/config.sub 2018-09-25 19:23:21.000000000 +0300 @@ -45,6 +45,11 @@ config=`echo $1 | sed 's/^aarch64-/arm-/'` sub_args="$sub_args $config" shift; ;; + aarch32-* ) + config=`echo $1 | sed 's/^aarch32-/arm-/'` + sub_args="$sub_args $config" + replace="aarch32-" + shift; ;; - ) # Use stdin as input. sub_args="$sub_args $1" shift; break ;; --- old/make/autoconf/flags.m4 2018-09-25 19:23:23.000000000 +0300 +++ new/make/autoconf/flags.m4 2018-09-25 19:23:22.000000000 +0300 @@ -38,8 +38,9 @@ if test "x$with_abi_profile" != x; then if test "x$OPENJDK_TARGET_CPU" != xarm && \ - test "x$OPENJDK_TARGET_CPU" != xaarch64; then - AC_MSG_ERROR([--with-abi-profile only available on arm/aarch64]) + test "x$OPENJDK_TARGET_CPU" != xaarch64 && \ + test "x$OPENJDK_TARGET_CPU" != xaarch32 ; then + AC_MSG_ERROR([--with-abi-profile only available on arm/aarch64/aarch32]) fi OPENJDK_TARGET_ABI_PROFILE=$with_abi_profile @@ -65,6 +66,14 @@ # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME ARM_FLOAT_TYPE= ARM_ARCH_TYPE_FLAGS= + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xgnueabihf; then + # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME + ARM_FLOAT_TYPE= + ARM_ARCH_TYPE_FLAGS= + elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xgnueabi; then + # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME + ARM_FLOAT_TYPE= + ARM_ARCH_TYPE_FLAGS= elif test "x$OPENJDK_TARGET_ABI_PROFILE" = xaarch64; then # No special flags, just need to trigger setting JDK_ARCH_ABI_PROP_NAME ARM_FLOAT_TYPE= --- old/make/autoconf/hotspot.m4 2018-09-25 19:23:24.000000000 +0300 +++ new/make/autoconf/hotspot.m4 2018-09-25 19:23:24.000000000 +0300 @@ -305,8 +305,14 @@ # Override hotspot cpu definitions for ARM platforms if test "x$OPENJDK_TARGET_CPU" = xarm; then - HOTSPOT_TARGET_CPU=arm_32 - HOTSPOT_TARGET_CPU_DEFINE="ARM32" + if test "x$HOTSPOT_TARGET_CPU_PORT" = xarm; then + HOTSPOT_TARGET_CPU=arm_32 + HOTSPOT_TARGET_CPU_DEFINE="ARM32" + else + HOTSPOT_TARGET_CPU=aarch32 + HOTSPOT_TARGET_CPU_ARCH=aarch32 + HOTSPOT_TARGET_CPU_DEFINE="AARCH32" + fi elif test "x$OPENJDK_TARGET_CPU" = xaarch64 && test "x$HOTSPOT_TARGET_CPU_PORT" = xarm64; then HOTSPOT_TARGET_CPU=arm_64 HOTSPOT_TARGET_CPU_ARCH=arm @@ -558,23 +564,27 @@ ################################################################################ # -# Specify which sources will be used to build the 64-bit ARM port +# Specify which sources will be used to build the ARM port # -# --with-cpu-port=arm64 will use hotspot/src/cpu/arm +# --with-cpu-port=arm will use hotspot/src/cpu/arm # --with-cpu-port=aarch64 will use hotspot/src/cpu/aarch64 +# --with-cpu-port=aarch32 will use hotspot/src/cpu/aarch32 # AC_DEFUN([SETUP_HOTSPOT_TARGET_CPU_PORT], [ AC_ARG_WITH(cpu-port, [AS_HELP_STRING([--with-cpu-port], - [specify sources to use for Hotspot 64-bit ARM port (arm64,aarch64) @<:@aarch64@:>@ ])]) + [specify sources to use for Hotspot ARM port (arm,aarch64,aarch32) @<:@aarch64@:>@ ])]) if test "x$with_cpu_port" != x; then - if test "x$OPENJDK_TARGET_CPU" != xaarch64; then - AC_MSG_ERROR([--with-cpu-port only available on aarch64]) - fi - if test "x$with_cpu_port" != xarm64 && \ - test "x$with_cpu_port" != xaarch64; then - AC_MSG_ERROR([--with-cpu-port must specify arm64 or aarch64]) + if test "x$OPENJDK_TARGET_CPU" != xaarch64 && \ + test "x$OPENJDK_TARGET_CPU" != xaarch32 && \ + test "x$OPENJDK_TARGET_CPU" != xarm ; then + AC_MSG_ERROR([--with-cpu-port only available on arm/aarch64/32]) + fi + if test "x$with_cpu_port" != xarm && \ + test "x$with_cpu_port" != xaarch64 && \ + test "x$with_cpu_port" != xaarch32 ; then + AC_MSG_ERROR([--with-cpu-port must specify arm, aarch32 or aarch64]) fi HOTSPOT_TARGET_CPU_PORT="$with_cpu_port" fi --- old/make/autoconf/platform.m4 2018-09-25 19:23:25.000000000 +0300 +++ new/make/autoconf/platform.m4 2018-09-25 19:23:25.000000000 +0300 @@ -28,6 +28,7 @@ # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN. AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU], [ + echo "LOOKING UP CPU ARCH $1" # First argument is the cpu name from the trip/quad case "$1" in x86_64) @@ -54,6 +55,12 @@ VAR_CPU_BITS=32 VAR_CPU_ENDIAN=little ;; + aarch32) + VAR_CPU=aarch32 + VAR_CPU_ARCH=aarch32 + VAR_CPU_BITS=32 + VAR_CPU_ENDIAN=little + ;; aarch64) VAR_CPU=aarch64 VAR_CPU_ARCH=aarch64 @@ -386,6 +393,8 @@ elif test "x$OPENJDK_$1_OS" != xmacosx && test "x$OPENJDK_$1_CPU" = xx86_64; then # On all platforms except macosx, we replace x86_64 with amd64. OPENJDK_$1_CPU_OSARCH="amd64" + elif test "x$OPENJDK_$1_CPU" = xaarch32; then + OPENJDK_$1_CPU_OSARCH="arm" fi AC_SUBST(OPENJDK_$1_CPU_OSARCH) --- old/make/hotspot/lib/JvmOverrideFiles.gmk 2018-09-25 19:23:26.000000000 +0300 +++ new/make/hotspot/lib/JvmOverrideFiles.gmk 2018-09-25 19:23:26.000000000 +0300 @@ -38,6 +38,10 @@ BUILD_LIBJVM_interp_masm_x86.cpp_CXXFLAGS := -Wno-uninitialized endif +ifeq ($(TOOLCHAIN_TYPE), gcc) + BUILD_LIBJVM_vm_version_aarch32_2.cpp_CXXFLAGS := -fno-stack-protector +endif + ifeq ($(OPENJDK_TARGET_OS), linux) BUILD_LIBJVM_ostream.cpp_CXXFLAGS := -D_FILE_OFFSET_BITS=64 BUILD_LIBJVM_logFileOutput.cpp_CXXFLAGS := -D_FILE_OFFSET_BITS=64 --- old/make/lib/Lib-jdk.hotspot.agent.gmk 2018-09-25 19:23:27.000000000 +0300 +++ new/make/lib/Lib-jdk.hotspot.agent.gmk 2018-09-25 19:23:27.000000000 +0300 @@ -67,6 +67,8 @@ LIBS_windows := dbgeng.lib, \ )) +ifneq ($(OPENJDK_TARGET_CPU), aarch32) TARGETS += $(BUILD_LIBSA) +endif ################################################################################ --- old/src/hotspot/os/linux/os_linux.cpp 2018-09-25 19:23:28.000000000 +0300 +++ new/src/hotspot/os/linux/os_linux.cpp 2018-09-25 19:23:28.000000000 +0300 @@ -1773,6 +1773,8 @@ static Elf32_Half running_arch_code=EM_AARCH64; #elif (defined ARM) static Elf32_Half running_arch_code=EM_ARM; +#elif (defined AARCH32) + static Elf32_Half running_arch_code=EM_ARM; #elif (defined S390) static Elf32_Half running_arch_code=EM_S390; #elif (defined ALPHA) @@ -3516,6 +3518,7 @@ AARCH64_ONLY(2 * M) AMD64_ONLY(2 * M) ARM32_ONLY(2 * M) + AARCH32_ONLY(2 * M) IA32_ONLY(4 * M) IA64_ONLY(256 * M) PPC_ONLY(4 * M) --- old/src/hotspot/share/adlc/adlparse.cpp 2018-09-25 19:23:30.000000000 +0300 +++ new/src/hotspot/share/adlc/adlparse.cpp 2018-09-25 19:23:29.000000000 +0300 @@ -483,7 +483,8 @@ else if (!strcmp(ident, "format")) oper->_format = format_parse(); else if (!strcmp(ident, "interface")) oper->_interface = interface_parse(); // Check identifier to see if it is the name of an attribute - else if (((attr = _globalNames[ident]->is_attribute()) != NULL) && + else if (_globalNames[ident] && + ((attr = _globalNames[ident]->is_attribute()) != NULL) && (attr->_atype == OP_ATTR)) oper->_attribs = attr_parse(ident); else { parse_err(SYNERR, "expected one of - constraint, predicate, match, encode, format, construct, or the name of a defined operand attribute at %s\n", ident); --- old/src/hotspot/share/c1/c1_Compiler.cpp 2018-09-25 19:23:31.000000000 +0300 +++ new/src/hotspot/share/c1/c1_Compiler.cpp 2018-09-25 19:23:31.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -215,10 +216,19 @@ case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateByteBufferCRC32: -#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) +#if defined(SPARC) || defined(S390) || defined(PPC64) || defined(AARCH64) || defined(AARCH32) case vmIntrinsics::_updateBytesCRC32C: case vmIntrinsics::_updateDirectByteBufferCRC32C: #endif +#ifdef AARCH32 + case vmIntrinsics::_aescrypt_encryptBlock: + case vmIntrinsics::_aescrypt_decryptBlock: + case vmIntrinsics::_sha_implCompress: + case vmIntrinsics::_sha2_implCompress: + case vmIntrinsics::_sha5_implCompress: + case vmIntrinsics::_montgomeryMultiply: + case vmIntrinsics::_montgomerySquare: +#endif case vmIntrinsics::_vectorizedMismatch: case vmIntrinsics::_compareAndSetInt: case vmIntrinsics::_compareAndSetObject: --- old/src/hotspot/share/c1/c1_LIR.cpp 2018-09-25 19:23:32.000000000 +0300 +++ new/src/hotspot/share/c1/c1_LIR.cpp 2018-09-25 19:23:32.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -145,6 +146,7 @@ case T_FLOAT: // FP return values can be also in CPU registers on ARM and PPC32 (softfp ABI) assert((kindfield == fpu_register || kindfield == stack_value + AARCH32_ONLY(|| kindfield == cpu_register) ARM_ONLY(|| kindfield == cpu_register) PPC32_ONLY(|| kindfield == cpu_register) ) && size_field() == single_size, "must match"); @@ -1492,7 +1494,7 @@ out->print("fpu%d", fpu_regnr()); } else if (is_double_fpu()) { out->print("fpu%d", fpu_regnrLo()); -#elif defined(ARM) +#elif defined(ARM) || defined(AARCH32) } else if (is_single_fpu()) { out->print("s%d", fpu_regnr()); } else if (is_double_fpu()) { --- old/src/hotspot/share/c1/c1_LIR.hpp 2018-09-25 19:23:33.000000000 +0300 +++ new/src/hotspot/share/c1/c1_LIR.hpp 2018-09-25 19:23:33.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -693,18 +694,40 @@ #ifdef __SOFTFP__ case T_FLOAT: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::float_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::single_size | - LIR_OprDesc::virtual_mask); +#ifdef AARCH32 + if (hasFPU()) { + res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | + LIR_OprDesc::float_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::single_size | + LIR_OprDesc::virtual_mask); + } else +#endif // AARCH32 + { + res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | + LIR_OprDesc::float_type | + LIR_OprDesc::cpu_register | + LIR_OprDesc::single_size | + LIR_OprDesc::virtual_mask); + } break; case T_DOUBLE: - res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | - LIR_OprDesc::double_type | - LIR_OprDesc::cpu_register | - LIR_OprDesc::double_size | - LIR_OprDesc::virtual_mask); +#ifdef AARCH32 + if(hasFPU()) { + res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size | + LIR_OprDesc::virtual_mask); + } else +#endif + { + res = (LIR_Opr)(intptr_t)((index << LIR_OprDesc::data_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::cpu_register | + LIR_OprDesc::double_size | + LIR_OprDesc::virtual_mask); + } break; #else // __SOFTFP__ case T_FLOAT: --- old/src/hotspot/share/c1/c1_LIRGenerator.cpp 2018-09-25 19:23:35.000000000 +0300 +++ new/src/hotspot/share/c1/c1_LIRGenerator.cpp 2018-09-25 19:23:34.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -3068,6 +3069,29 @@ do_update_CRC32C(x); break; +#ifdef AARCH32 + case vmIntrinsics::_aescrypt_encryptBlock: + case vmIntrinsics::_aescrypt_decryptBlock: + do_aescrypt_block(x); + break; + + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + do_aescrypt_cbc(x); + break; + + case vmIntrinsics::_sha_implCompress: + case vmIntrinsics::_sha2_implCompress: + case vmIntrinsics::_sha5_implCompress: + do_sha(x); + break; + + case vmIntrinsics::_montgomeryMultiply: + case vmIntrinsics::_montgomerySquare: + do_montgomery_intrinsic(x); + break; +#endif + case vmIntrinsics::_vectorizedMismatch: do_vectorizedMismatch(x); break; --- old/src/hotspot/share/c1/c1_LIRGenerator.hpp 2018-09-25 19:23:36.000000000 +0300 +++ new/src/hotspot/share/c1/c1_LIRGenerator.hpp 2018-09-25 19:23:36.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -264,7 +265,16 @@ void do_Reference_get(Intrinsic* x); void do_update_CRC32(Intrinsic* x); void do_update_CRC32C(Intrinsic* x); +#ifdef AARCH32 + void do_update_CRC32_inner(Intrinsic* x, int is_crc32c); +#endif void do_vectorizedMismatch(Intrinsic* x); +#ifdef AARCH32 + void do_aescrypt_block(Intrinsic* x); + void do_aescrypt_cbc(Intrinsic* x); + void do_sha(Intrinsic* x); + void do_montgomery_intrinsic(Intrinsic *x); +#endif public: LIR_Opr call_runtime(BasicTypeArray* signature, LIRItemList* args, address entry, ValueType* result_type, CodeEmitInfo* info); @@ -311,6 +321,9 @@ void array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, ciMethod* profiled_method, int profiled_bci); static LIR_Opr result_register_for(ValueType* type, bool callee = false); +#ifdef AARCH32 + static LIR_Opr java_result_register_for(ValueType* type, bool callee = false); +#endif ciObject* get_jobject_constant(Value value); --- old/src/hotspot/share/c1/c1_LinearScan.cpp 2018-09-25 19:23:37.000000000 +0300 +++ new/src/hotspot/share/c1/c1_LinearScan.cpp 2018-09-25 19:23:37.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -179,10 +180,10 @@ } bool LinearScan::is_virtual_cpu_interval(const Interval* i) { -#if defined(__SOFTFP__) || defined(E500V2) +#if !defined(AARCH32) && (defined(__SOFTFP__) || defined(E500V2)) return i->reg_num() >= LIR_OprDesc::vreg_base; #else - return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() != T_FLOAT && i->type() != T_DOUBLE); + return i->reg_num() >= LIR_OprDesc::vreg_base && (AARCH32_ONLY(!hasFPU() ||) (i->type() != T_FLOAT && i->type() != T_DOUBLE)); #endif // __SOFTFP__ or E500V2 } @@ -191,10 +192,10 @@ } bool LinearScan::is_virtual_fpu_interval(const Interval* i) { -#if defined(__SOFTFP__) || defined(E500V2) +#if !defined(AARCH32) && (defined(__SOFTFP__) || defined(E500V2)) return false; #else - return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() == T_FLOAT || i->type() == T_DOUBLE); + return i->reg_num() >= LIR_OprDesc::vreg_base && (i->type() == T_FLOAT || i->type() == T_DOUBLE) AARCH32_ONLY(&& hasFPU()); #endif // __SOFTFP__ or E500V2 } @@ -2100,6 +2101,13 @@ #ifdef __SOFTFP__ case T_FLOAT: // fall through +#if defined(AARCH32) + if(hasFPU()) { + assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register"); + assert(interval->assigned_regHi() == any_reg, "must not have hi register"); + return LIR_OprFact::single_fpu(assigned_reg - pd_first_fpu_reg); + } +#endif #endif // __SOFTFP__ case T_INT: { assert(assigned_reg >= pd_first_cpu_reg && assigned_reg <= pd_last_cpu_reg, "no cpu register"); @@ -2109,6 +2117,14 @@ #ifdef __SOFTFP__ case T_DOUBLE: // fall through +#if defined(AARCH32) + if(hasFPU()) { + assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register"); + assert(interval->assigned_regHi() >= pd_first_fpu_reg && interval->assigned_regHi() <= pd_last_fpu_reg, "no fpu register"); + assert(assigned_reg % 2 == 0 && assigned_reg + 1 == interval->assigned_regHi(), "must be sequential and even"); + return LIR_OprFact::double_fpu(assigned_reg - pd_first_fpu_reg, interval->assigned_regHi() - pd_first_fpu_reg); + } +#endif #endif // __SOFTFP__ case T_LONG: { int assigned_regHi = interval->assigned_regHi(); @@ -2176,7 +2192,7 @@ assert(interval->assigned_regHi() >= pd_first_fpu_reg && interval->assigned_regHi() <= pd_last_fpu_reg, "no fpu register"); assert(assigned_reg % 2 == 0 && assigned_reg + 1 == interval->assigned_regHi(), "must be sequential and even"); LIR_Opr result = LIR_OprFact::double_fpu(interval->assigned_regHi() - pd_first_fpu_reg, assigned_reg - pd_first_fpu_reg); -#elif defined(ARM32) +#elif defined(ARM32) || defined(AARCH32) assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register"); assert(interval->assigned_regHi() >= pd_first_fpu_reg && interval->assigned_regHi() <= pd_last_fpu_reg, "no fpu register"); assert(assigned_reg % 2 == 0 && assigned_reg + 1 == interval->assigned_regHi(), "must be sequential and even"); @@ -2774,7 +2790,7 @@ #ifdef SPARC assert(opr->fpu_regnrLo() == opr->fpu_regnrHi() + 1, "assumed in calculation (only fpu_regnrHi is used)"); #endif -#ifdef ARM32 +#if defined(ARM32) || defined(AARCH32) assert(opr->fpu_regnrHi() == opr->fpu_regnrLo() + 1, "assumed in calculation (only fpu_regnrLo is used)"); #endif #ifdef PPC32 --- old/src/hotspot/share/c1/c1_Runtime1.cpp 2018-09-25 19:23:38.000000000 +0300 +++ new/src/hotspot/share/c1/c1_Runtime1.cpp 2018-09-25 19:23:38.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -326,6 +327,16 @@ #endif FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32()); FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32C()); +#ifdef AARCH32 + FUNCTION_CASE(entry, StubRoutines::aescrypt_encryptBlock()); + FUNCTION_CASE(entry, StubRoutines::aescrypt_decryptBlock()); + FUNCTION_CASE(entry, StubRoutines::cipherBlockChaining_encryptAESCrypt_special()); + FUNCTION_CASE(entry, StubRoutines::cipherBlockChaining_decryptAESCrypt_special()); + FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32()); + FUNCTION_CASE(entry, StubRoutines::sha1_implCompress()); + FUNCTION_CASE(entry, StubRoutines::sha256_implCompress()); + FUNCTION_CASE(entry, StubRoutines::sha512_implCompress()); +#endif FUNCTION_CASE(entry, StubRoutines::vectorizedMismatch()); FUNCTION_CASE(entry, StubRoutines::dexp()); FUNCTION_CASE(entry, StubRoutines::dlog()); --- old/src/hotspot/share/classfile/javaClasses.cpp 2018-09-25 19:23:39.000000000 +0300 +++ new/src/hotspot/share/classfile/javaClasses.cpp 2018-09-25 19:23:39.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -4273,6 +4274,14 @@ int java_util_concurrent_locks_AbstractOwnableSynchronizer::_owner_offset; int reflect_ConstantPool::_oop_offset; int reflect_UnsafeStaticFieldAccessorImpl::_base_offset; +#ifdef AARCH32 +int com_sun_crypto_provider_AESCrypt::_K_offset; +int com_sun_crypto_provider_CipherBlockChaining::_r_offset; +int com_sun_crypto_provider_FeedbackCipher::_embeddedCipher_offset; +int sun_security_provider_SHA::_state_offset; +int sun_security_provider_SHA2::_state_offset; +int sun_security_provider_SHA5::_state_offset; +#endif #define STACKTRACEELEMENT_FIELDS_DO(macro) \ macro(declaringClassObject_offset, k, "declaringClassObject", class_signature, false); \ @@ -4435,6 +4444,86 @@ return (hardcoded_offset * heapOopSize) + instanceOopDesc::base_offset_in_bytes(); } +#ifdef AARCH32 +// Support for intrinsification of com.sun.crypto.provider.AESCrypto.encrypt +#define AESCRYPT_FIELDS_DO(macro) \ + macro(_K_offset, k, "K", int_array_signature, false) + +void com_sun_crypto_provider_AESCrypt::compute_offsets() { + InstanceKlass* k = SystemDictionary::AESCrypt_klass(); + AESCRYPT_FIELDS_DO(FIELD_COMPUTE_OFFSET); +} + +int com_sun_crypto_provider_AESCrypt::K_offset() { + return _K_offset; +} + +// Support for intrinsification of com_sun_crypto_provider_CipherBlockChaining.encrypt +#define CBC_FIELDS_DO(macro) \ + macro(_r_offset, k, "r", byte_array_signature, false) + +void com_sun_crypto_provider_CipherBlockChaining::compute_offsets() { + InstanceKlass* k = SystemDictionary::CipherBlockChaining_klass(); + CBC_FIELDS_DO(FIELD_COMPUTE_OFFSET); +} + +int com_sun_crypto_provider_CipherBlockChaining::r_offset() { + return _r_offset; +} + +// Support for intrinsification of com_sun_crypto_provider_CipherBlockChaining.encrypt +#define FC_FIELDS_DO(macro) \ + macro(_embeddedCipher_offset, k, "embeddedCipher", symmetriccipher_signature, false) + +void com_sun_crypto_provider_FeedbackCipher::compute_offsets() { + InstanceKlass* k = SystemDictionary::FeedbackCipher_klass(); + FC_FIELDS_DO(FIELD_COMPUTE_OFFSET); +} + +int com_sun_crypto_provider_FeedbackCipher::embeddedCipher_offset() { + return _embeddedCipher_offset; +} + +// Support for intrinsification of sun_security_provider_SHA.implCompress +#define SHA_FIELDS_DO(macro) \ + macro(_state_offset, k, "state", int_array_signature, false) + +void sun_security_provider_SHA::compute_offsets() { + InstanceKlass* k = SystemDictionary::SHA_klass(); + SHA_FIELDS_DO(FIELD_COMPUTE_OFFSET); +} + +int sun_security_provider_SHA::state_offset() { + return _state_offset; +} + +// Support for intrinsification of sun_security_provider_SHA2.implCompress +#define SHA2_FIELDS_DO(macro) \ + macro(_state_offset, k, "state", int_array_signature, false) + +void sun_security_provider_SHA2::compute_offsets() { + InstanceKlass* k = SystemDictionary::SHA2_klass(); + SHA2_FIELDS_DO(FIELD_COMPUTE_OFFSET); +} + +int sun_security_provider_SHA2::state_offset() { + return _state_offset; +} + +// Support for intrinsification of sun_security_provider_SHA5.implCompress +#define SHA5_FIELDS_DO(macro) \ + macro(_state_offset, k, "state", long_array_signature, false) + +void sun_security_provider_SHA5::compute_offsets() { + InstanceKlass* k = SystemDictionary::SHA5_klass(); + SHA5_FIELDS_DO(FIELD_COMPUTE_OFFSET); +} + +int sun_security_provider_SHA5::state_offset() { + return _state_offset; +} +#endif + // Compute hard-coded offsets // Invoked before SystemDictionary::initialize, so pre-loaded classes // are not available to determine the offset_of_static_fields. @@ -4495,6 +4584,15 @@ // generated interpreter code wants to know about the offsets we just computed: AbstractAssembler::update_delayed_values(); + +#ifdef AARCH32 + com_sun_crypto_provider_AESCrypt::compute_offsets(); + com_sun_crypto_provider_FeedbackCipher::compute_offsets(); + com_sun_crypto_provider_CipherBlockChaining::compute_offsets(); + sun_security_provider_SHA::compute_offsets(); + sun_security_provider_SHA2::compute_offsets(); + sun_security_provider_SHA5::compute_offsets(); +#endif } #ifndef PRODUCT --- old/src/hotspot/share/classfile/javaClasses.hpp 2018-09-25 19:23:41.000000000 +0300 +++ new/src/hotspot/share/classfile/javaClasses.hpp 2018-09-25 19:23:40.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1491,6 +1492,114 @@ static void serialize(SerializeClosure* f) NOT_CDS_RETURN; }; +#ifdef AARCH32 +class com_sun_crypto_provider_AESCrypt: AllStatic { + public: + static int _K_offset; + enum { + hc_ROUND_12_offset = 0, + hc_ROUND_14_offset = 1, + hc_sessionK_offset = 2, + hc_K_offset = 3, + hc_lastKey_offset = 4, + hc_limit = 5 + }; + + static int K_offset(); + static void compute_offsets(); +}; + +class com_sun_crypto_provider_FeedbackCipher: AllStatic { + public: + static int _embeddedCipher_offset; + enum { + hc_blockSize_offset = 0, + hc_embeddedCipher_offset = 1, + hc_iv_offset + }; + + static int embeddedCipher_offset(); + static void compute_offsets(); +}; + +class com_sun_crypto_provider_CipherBlockChaining: + public com_sun_crypto_provider_FeedbackCipher { + public: + static int _r_offset; + enum { + hc_r_offset = hc_iv_offset+1, + hc_k_offset + }; + + static int r_offset(); + static void compute_offsets(); +}; + +class java_security_MessageDigestSpi { +public: + enum { + hc_tempArray = 0 + }; +}; + +class sun_security_provider_DigestBase: java_security_MessageDigestSpi { +public: + enum { + hc_digestLength = hc_tempArray + 1, + hc_bytesProcessed_low, + hc_bytesProcessed_high, + hc_blockSize, + hc_bufOfs, + hc_oneByte, + hc_algorithm, + hc_buffer + }; +}; + +class sun_security_provider_SHA: + public sun_security_provider_DigestBase { + public: + static int _state_offset; + enum { + hc_W = hc_buffer + 1, + hc_state, + hc_init_hashes + }; + + static int state_offset(); + static void compute_offsets(); +}; + + +class sun_security_provider_SHA2: + public sun_security_provider_DigestBase { + public: + static int _state_offset; + enum { + hc_W = hc_buffer + 1, + hc_state, + hc_init_hashes + }; + + static int state_offset(); + static void compute_offsets(); +}; + +class sun_security_provider_SHA5: + public sun_security_provider_DigestBase { + public: + static int _state_offset; + enum { + hc_W = hc_buffer + 1, + hc_state, + hc_init_hashes + }; + + static int state_offset(); + static void compute_offsets(); +}; +#endif + // Use to declare fields that need to be injected into Java classes // for the JVM to use. The name_index and signature_index are // declared in vmSymbols. The may_be_java flag is used to declare --- old/src/hotspot/share/classfile/systemDictionary.hpp 2018-09-25 19:23:42.000000000 +0300 +++ new/src/hotspot/share/classfile/systemDictionary.hpp 2018-09-25 19:23:42.000000000 +0300 @@ -212,6 +212,14 @@ do_klass(Integer_klass, java_lang_Integer, Pre ) \ do_klass(Long_klass, java_lang_Long, Pre ) \ \ + /* In the name of the God of speed */ \ + AARCH32_ONLY(do_klass(AESCrypt_klass, com_sun_crypto_provider_aescrypt, Opt )) \ + AARCH32_ONLY(do_klass(CipherBlockChaining_klass, com_sun_crypto_provider_cipherBlockChaining, Opt )) \ + AARCH32_ONLY(do_klass(FeedbackCipher_klass, com_sun_crypto_provider_feedbackcipher, Opt )) \ + AARCH32_ONLY(do_klass(SHA_klass, sun_security_provider_sha, Opt )) \ + AARCH32_ONLY(do_klass(SHA2_klass, sun_security_provider_sha2, Opt )) \ + AARCH32_ONLY(do_klass(SHA5_klass, sun_security_provider_sha5, Opt )) \ + \ /* JVMCI classes. These are loaded on-demand. */ \ JVMCI_WK_KLASSES_DO(do_klass) \ \ --- old/src/hotspot/share/classfile/vmSymbols.hpp 2018-09-25 19:23:43.000000000 +0300 +++ new/src/hotspot/share/classfile/vmSymbols.hpp 2018-09-25 19:23:43.000000000 +0300 @@ -321,6 +321,8 @@ template(DEFAULT_CONTEXT_name, "DEFAULT_CONTEXT") \ NOT_LP64( do_alias(intptr_signature, int_signature) ) \ LP64_ONLY( do_alias(intptr_signature, long_signature) ) \ + /* for the sake of the god of speed */ \ + AARCH32_ONLY(template(com_sun_crypto_provider_feedbackcipher, "com/sun/crypto/provider/FeedbackCipher")) \ \ /* Support for JVMCI */ \ JVMCI_VM_SYMBOLS_DO(template, do_alias) \ @@ -481,6 +483,7 @@ template(byte_array_signature, "[B") \ template(char_array_signature, "[C") \ template(int_array_signature, "[I") \ + AARCH32_ONLY(template(long_array_signature, "[J")) \ template(object_void_signature, "(Ljava/lang/Object;)V") \ template(object_int_signature, "(Ljava/lang/Object;)I") \ template(object_boolean_signature, "(Ljava/lang/Object;)Z") \ @@ -546,6 +549,7 @@ template(int_String_signature, "(I)Ljava/lang/String;") \ template(boolean_boolean_int_signature, "(ZZ)I") \ template(codesource_permissioncollection_signature, "(Ljava/security/CodeSource;Ljava/security/PermissionCollection;)V") \ + AARCH32_ONLY(template(symmetriccipher_signature, "Lcom/sun/crypto/provider/SymmetricCipher;")) \ /* signature symbols needed by intrinsics */ \ VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, template, VM_ALIAS_IGNORE) \ \ --- old/src/hotspot/share/interpreter/abstractInterpreter.cpp 2018-09-25 19:23:44.000000000 +0300 +++ new/src/hotspot/share/interpreter/abstractInterpreter.cpp 2018-09-25 19:23:44.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -188,6 +189,37 @@ default : break; } +#ifdef AARCH32 + if (UseAESIntrinsics) { + // Use optimized stub code for AES native methods. + switch (m->intrinsic_id()) { + case vmIntrinsics::_aescrypt_encryptBlock : return com_sun_crypto_provider_AESCrypt_encryptBlock; + case vmIntrinsics::_aescrypt_decryptBlock : return com_sun_crypto_provider_AESCrypt_decryptBlock; + } + // Use optimized stub code for AES CBC native methods. + if (StubRoutines::cipherBlockChaining_encryptAESCrypt_special() && + m->intrinsic_id() == vmIntrinsics::_cipherBlockChaining_encryptAESCrypt) + return com_sun_crypto_provider_CipherBlockChaining_encrypt; + + if (StubRoutines::cipherBlockChaining_decryptAESCrypt_special() && + m->intrinsic_id() == vmIntrinsics::_cipherBlockChaining_decryptAESCrypt) + return com_sun_crypto_provider_CipherBlockChaining_decrypt; + } + + // Use optimized stub code for SHA256/512 native methods. + switch (m->intrinsic_id()) { + case vmIntrinsics::_sha_implCompress : + if (UseSHA1Intrinsics) return sun_security_provider_SHA_implCompress; + break; + case vmIntrinsics::_sha2_implCompress : + if (UseSHA256Intrinsics) return sun_security_provider_SHA2_implCompress; + break; + case vmIntrinsics::_sha5_implCompress : + if (UseSHA512Intrinsics) return sun_security_provider_SHA5_implCompress; + break; + } +#endif + // Accessor method? if (m->is_getter()) { // TODO: We should have used ::is_accessor above, but fast accessors in Zero expect only getters. @@ -282,6 +314,15 @@ case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break; case java_util_zip_CRC32C_updateBytes : tty->print("java_util_zip_CRC32C_updateBytes"); break; case java_util_zip_CRC32C_updateDirectByteBuffer: tty->print("java_util_zip_CRC32C_updateDirectByteByffer"); break; +#ifdef AARCH32 + case com_sun_crypto_provider_AESCrypt_encryptBlock : tty->print("com_sun_crypto_provider_AESCrypt_encryptBlock"); break; + case com_sun_crypto_provider_AESCrypt_decryptBlock : tty->print("com_sun_crypto_provider_AESCrypt_decryptBlock"); break; + case com_sun_crypto_provider_CipherBlockChaining_encrypt : tty->print("com_sun_crypto_provider_CipherBlockChaining_encrypt"); break; + case com_sun_crypto_provider_CipherBlockChaining_decrypt : tty->print("com_sun_crypto_provider_CipherBlockChaining_decrypt"); break; + case sun_security_provider_SHA_implCompress : tty->print("sun_security_provider_SHA_implCompress"); break; + case sun_security_provider_SHA2_implCompress : tty->print("sun_security_provider_SHA2_implCompress"); break; + case sun_security_provider_SHA5_implCompress : tty->print("sun_security_provider_SHA5_implCompress"); break; +#endif default: if (kind >= method_handle_invoke_FIRST && kind <= method_handle_invoke_LAST) { --- old/src/hotspot/share/interpreter/abstractInterpreter.hpp 2018-09-25 19:23:46.000000000 +0300 +++ new/src/hotspot/share/interpreter/abstractInterpreter.hpp 2018-09-25 19:23:45.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -89,6 +90,15 @@ java_lang_Float_floatToRawIntBits, // implementation of java.lang.Float.floatToRawIntBits() java_lang_Double_longBitsToDouble, // implementation of java.lang.Double.longBitsToDouble() java_lang_Double_doubleToRawLongBits, // implementation of java.lang.Double.doubleToRawLongBits() +#ifdef AARCH32 + com_sun_crypto_provider_AESCrypt_encryptBlock, // implementation of com/sun/crypto/provider/AESCrypt/encryptBlock() + com_sun_crypto_provider_AESCrypt_decryptBlock, // implementation of com/sun/crypto/provider/AESCrypt/decryptBlock() + com_sun_crypto_provider_CipherBlockChaining_encrypt, // implementation of com/sun/crypto/provider/CipherBlockChaining/encrypt() + com_sun_crypto_provider_CipherBlockChaining_decrypt, // implementation of com/sun/crypto/provider/CipherBlockChaining/decrypt() + sun_security_provider_SHA_implCompress, // implementation of sun/security/provider/SHA2/implCompress() + sun_security_provider_SHA2_implCompress, // implementation of sun/security/provider/SHA2/implCompress() + sun_security_provider_SHA5_implCompress, // implementation of sun/security/provider/SHA5/implCompress() +#endif number_of_method_entries, invalid = -1 }; @@ -155,6 +165,13 @@ // the compiled version to the intrinsic version. static bool can_be_compiled(const methodHandle& m) { switch (m->intrinsic_id()) { +#ifdef AARCH32 + case vmIntrinsics::_aescrypt_encryptBlock: + case vmIntrinsics::_aescrypt_decryptBlock: + case vmIntrinsics::_sha_implCompress: + case vmIntrinsics::_sha2_implCompress: + case vmIntrinsics::_sha5_implCompress: +#endif // AARCH32 case vmIntrinsics::_dsin : // fall thru case vmIntrinsics::_dcos : // fall thru case vmIntrinsics::_dtan : // fall thru --- old/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp 2018-09-25 19:23:47.000000000 +0300 +++ new/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp 2018-09-25 19:23:46.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -227,6 +228,29 @@ method_entry(java_lang_Double_longBitsToDouble); method_entry(java_lang_Double_doubleToRawLongBits); +#ifdef AARCH32 + if (UseAESIntrinsics) { + method_entry(com_sun_crypto_provider_AESCrypt_encryptBlock) + method_entry(com_sun_crypto_provider_AESCrypt_decryptBlock) + if (StubRoutines::cipherBlockChaining_encryptAESCrypt_special()) { + method_entry(com_sun_crypto_provider_CipherBlockChaining_encrypt) + } + if (StubRoutines::cipherBlockChaining_decryptAESCrypt_special()) { + method_entry(com_sun_crypto_provider_CipherBlockChaining_decrypt) + } + } + + if (UseSHA1Intrinsics) { + method_entry(sun_security_provider_SHA_implCompress) + } + if (UseSHA256Intrinsics) { + method_entry(sun_security_provider_SHA2_implCompress) + } + if (UseSHA512Intrinsics) { + method_entry(sun_security_provider_SHA5_implCompress) + } +#endif + #undef method_entry // Bytecodes @@ -460,6 +484,21 @@ native = true; break; #endif // !IA32 +#ifdef AARCH32 + case Interpreter::com_sun_crypto_provider_AESCrypt_encryptBlock: + case Interpreter::com_sun_crypto_provider_AESCrypt_decryptBlock: + entry_point = generate_aescrypt_block_entry(kind); + break; + case Interpreter::com_sun_crypto_provider_CipherBlockChaining_encrypt: + case Interpreter::com_sun_crypto_provider_CipherBlockChaining_decrypt: + // don't use AES CBC intrinsic in interpreter + break; + case Interpreter::sun_security_provider_SHA_implCompress: + case Interpreter::sun_security_provider_SHA2_implCompress: + case Interpreter::sun_security_provider_SHA5_implCompress: + entry_point = generate_SHA_implCompress_entry(kind); + break; +#endif default: fatal("unexpected method kind: %d", kind); break; --- old/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp 2018-09-25 19:23:48.000000000 +0300 +++ new/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp 2018-09-25 19:23:48.000000000 +0300 @@ -1,5 +1,6 @@ /* * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -100,6 +101,12 @@ address generate_Double_longBitsToDouble_entry(); address generate_Double_doubleToRawLongBits_entry(); #endif // IA32 +#ifdef AARCH32 + address generate_CRC32_updateBytes_inner(AbstractInterpreter::MethodKind kind, int is_crc32c); + address generate_aescrypt_block_entry(AbstractInterpreter::MethodKind kind); + address generate_cipherBlockChaining_encryptAESCrypt_entry(AbstractInterpreter::MethodKind kind); + address generate_SHA_implCompress_entry(AbstractInterpreter::MethodKind kind); +#endif // Some platforms don't need registers, other need two. Unused function is // left unimplemented. void generate_stack_overflow_check(void); @@ -114,8 +121,10 @@ void restore_native_result(void); #endif // SPARC -#ifdef AARCH64 +#if defined(AARCH64) void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs); +#elif defined(AARCH32) + void generate_transcendental_entry(AbstractInterpreter::MethodKind kind); #endif // AARCH64 #ifdef PPC --- old/src/hotspot/share/jfr/recorder/repository/jfrChunkWriter.cpp 2018-09-25 19:23:49.000000000 +0300 +++ new/src/hotspot/share/jfr/recorder/repository/jfrChunkWriter.cpp 2018-09-25 19:23:49.000000000 +0300 @@ -89,11 +89,11 @@ void JfrChunkWriter::write_header(intptr_t metadata_offset) { assert(this->is_valid(), "invariant"); // Chunk size - this->write_be_at_offset(size_written(), CHUNK_SIZE_OFFSET); + this->write_be_at_offset((jlong)size_written(), CHUNK_SIZE_OFFSET); // initial checkpoint event offset this->write_be_at_offset(_chunkstate->previous_checkpoint_offset(), CHUNK_SIZE_OFFSET + (1 * FILEHEADER_SLOT_SIZE)); // metadata event offset - this->write_be_at_offset(metadata_offset, CHUNK_SIZE_OFFSET + (2 * FILEHEADER_SLOT_SIZE)); + this->write_be_at_offset((jlong)metadata_offset, CHUNK_SIZE_OFFSET + (2 * FILEHEADER_SLOT_SIZE)); // start of chunk in nanos since epoch this->write_be_at_offset(_chunkstate->previous_start_nanos(), CHUNK_SIZE_OFFSET + (3 * FILEHEADER_SLOT_SIZE)); // duration of chunk in nanos --- old/src/hotspot/share/jfr/writers/jfrWriterHost.inline.hpp 2018-09-25 19:23:50.000000000 +0300 +++ new/src/hotspot/share/jfr/writers/jfrWriterHost.inline.hpp 2018-09-25 19:23:50.000000000 +0300 @@ -196,7 +196,7 @@ template inline void WriterHost::write(double value) { - be_write(*(uintptr_t*)&(value)); + be_write(*(u8*)&(value)); } template @@ -277,22 +277,22 @@ template void WriterHost::write(const Ticks& time) { - write((uintptr_t)JfrTime::is_ft_enabled() ? time.ft_value() : time.value()); + write((u8)JfrTime::is_ft_enabled() ? time.ft_value() : time.value()); } template void WriterHost::write(const Tickspan& time) { - write((uintptr_t)JfrTime::is_ft_enabled() ? time.ft_value() : time.value()); + write((u8)JfrTime::is_ft_enabled() ? time.ft_value() : time.value()); } template void WriterHost::write(const JfrTicks& time) { - write((uintptr_t)time.value()); + write((u8)time.value()); } template void WriterHost::write(const JfrTickspan& time) { - write((uintptr_t)time.value()); + write((u8)time.value()); } template --- old/src/hotspot/share/opto/c2_globals.hpp 2018-09-25 19:23:52.000000000 +0300 +++ new/src/hotspot/share/opto/c2_globals.hpp 2018-09-25 19:23:51.000000000 +0300 @@ -719,11 +719,11 @@ diagnostic(bool, UseMulAddIntrinsic, false, \ "Enables intrinsification of BigInteger.mulAdd()") \ \ - diagnostic(bool, UseMontgomeryMultiplyIntrinsic, false, \ - "Enables intrinsification of BigInteger.montgomeryMultiply()") \ + NOT_AARCH32(diagnostic(bool, UseMontgomeryMultiplyIntrinsic, false, \ + "Enables intrinsification of BigInteger.montgomeryMultiply()")) \ \ - diagnostic(bool, UseMontgomerySquareIntrinsic, false, \ - "Enables intrinsification of BigInteger.montgomerySquare()") \ + NOT_AARCH32(diagnostic(bool, UseMontgomerySquareIntrinsic, false, \ + "Enables intrinsification of BigInteger.montgomerySquare()")) \ \ product(bool, UseTypeSpeculation, true, \ "Speculatively propagate types from profiles") \ --- old/src/hotspot/share/opto/c2compiler.cpp 2018-09-25 19:23:53.000000000 +0300 +++ new/src/hotspot/share/opto/c2compiler.cpp 2018-09-25 19:23:53.000000000 +0300 @@ -578,7 +578,9 @@ case vmIntrinsics::_sha_implCompress: case vmIntrinsics::_sha2_implCompress: case vmIntrinsics::_sha5_implCompress: +#ifndef AARCH32 case vmIntrinsics::_digestBase_implCompressMB: +#endif case vmIntrinsics::_multiplyToLen: case vmIntrinsics::_squareToLen: case vmIntrinsics::_mulAdd: --- old/src/hotspot/share/opto/library_call.cpp 2018-09-25 19:23:54.000000000 +0300 +++ new/src/hotspot/share/opto/library_call.cpp 2018-09-25 19:23:54.000000000 +0300 @@ -5673,7 +5673,11 @@ bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) { address stubAddr = NULL; const char *stubName; +#ifdef AARCH32 + assert(UseAESIntrinsics, "sanity"); +#else assert(UseAES, "need AES instruction support"); +#endif switch(id) { case vmIntrinsics::_aescrypt_encryptBlock: @@ -5742,7 +5746,11 @@ address stubAddr = NULL; const char *stubName = NULL; +#ifdef AARCH32 + assert(UseAESIntrinsics, "sanity"); +#else assert(UseAES, "need AES instruction support"); +#endif switch(id) { case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: @@ -5811,6 +5819,9 @@ Node* objRvec = load_field_from_object(cipherBlockChaining_object, "r", "[B", /*is_exact*/ false); if (objRvec == NULL) return false; Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE); +#ifdef AARCH32 + if (r_start == NULL) return false; +#endif Node* cbcCrypt; if (Matcher::pass_original_key_for_aes()) { --- old/src/hotspot/share/runtime/globals.hpp 2018-09-25 19:23:56.000000000 +0300 +++ new/src/hotspot/share/runtime/globals.hpp 2018-09-25 19:23:55.000000000 +0300 @@ -253,8 +253,9 @@ /* UseMembar is theoretically a temp flag used for memory barrier */ \ /* removal testing. It was supposed to be removed before FCS but has */ \ /* been re-added (see 6401008) */ \ + NOT_AARCH32( \ product_pd(bool, UseMembar, \ - "(Unstable) Issues membars on thread state transitions") \ + "(Unstable) Issues membars on thread state transitions")) \ \ develop(bool, CleanChunkPoolAsync, true, \ "Clean the chunk pool asynchronously") \ @@ -444,6 +445,12 @@ diagnostic(bool, UseAESCTRIntrinsics, false, \ "Use intrinsics for the paralleled version of AES/CTR crypto") \ \ + AARCH32_ONLY(diagnostic(bool, UseMontgomeryMultiplyIntrinsic, false, \ + "Enables intrinsification of BigInteger.montgomeryMultiply()")) \ + \ + AARCH32_ONLY(diagnostic(bool, UseMontgomerySquareIntrinsic, false, \ + "Enables intrinsification of BigInteger.montgomerySquare()")) \ + \ diagnostic(bool, UseSHA1Intrinsics, false, \ "Use intrinsics for SHA-1 crypto hash function. " \ "Requires that UseSHA is enabled.") \ --- old/src/hotspot/share/runtime/synchronizer.cpp 2018-09-25 19:23:57.000000000 +0300 +++ new/src/hotspot/share/runtime/synchronizer.cpp 2018-09-25 19:23:57.000000000 +0300 @@ -220,11 +220,6 @@ // Case: light contention possibly amenable to TLE // Case: TLE inimical operations such as nested/recursive synchronization - if (owner == Self) { - m->_recursions++; - return true; - } - // This Java Monitor is inflated so obj's header will never be // displaced to this thread's BasicLock. Make the displaced header // non-NULL so this BasicLock is not seen as recursive nor as @@ -237,6 +232,11 @@ // and last are the inflated Java Monitor (ObjectMonitor) checks. lock->set_displaced_header(markOopDesc::unused_mark()); + if (owner == Self) { + m->_recursions++; + return true; + } + if (owner == NULL && Atomic::replace_if_null(Self, &(m->_owner))) { assert(m->_recursions == 0, "invariant"); assert(m->_owner == Self, "invariant"); --- old/src/hotspot/share/runtime/vmStructs.cpp 2018-09-25 19:23:59.000000000 +0300 +++ new/src/hotspot/share/runtime/vmStructs.cpp 2018-09-25 19:23:59.000000000 +0300 @@ -607,8 +607,16 @@ static_field(StubRoutines, _call_stub_return_address, address) \ static_field(StubRoutines, _aescrypt_encryptBlock, address) \ static_field(StubRoutines, _aescrypt_decryptBlock, address) \ +AARCH32_ONLY(static_field(StubRoutines, _aes_table_te_addr, address)) \ +AARCH32_ONLY(static_field(StubRoutines, _aes_table_td_addr, address)) \ static_field(StubRoutines, _cipherBlockChaining_encryptAESCrypt, address) \ static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \ +AARCH32_ONLY(static_field(StubRoutines, _sha1_implCompress, address)) \ +AARCH32_ONLY(static_field(StubRoutines, _sha1_table_addr, address)) \ +AARCH32_ONLY(static_field(StubRoutines, _sha256_implCompress, address)) \ +AARCH32_ONLY(static_field(StubRoutines, _sha256_table_addr, address)) \ +AARCH32_ONLY(static_field(StubRoutines, _sha512_implCompress, address)) \ +AARCH32_ONLY(static_field(StubRoutines, _sha512_table_addr, address)) \ static_field(StubRoutines, _counterMode_AESCrypt, address) \ static_field(StubRoutines, _ghash_processBlocks, address) \ static_field(StubRoutines, _base64_encodeBlock, address) \ --- old/src/hotspot/share/utilities/macros.hpp 2018-09-25 19:24:00.000000000 +0300 +++ new/src/hotspot/share/utilities/macros.hpp 2018-09-25 19:24:00.000000000 +0300 @@ -541,10 +541,11 @@ #define NOT_E500V2(code) code #endif -// Note: There are three ARM ports. They set the following in the makefiles: +// Note: There are four ARM ports. They set the following in the makefiles: // 1. Closed 32-bit port: -DARM -DARM32 -DTARGET_ARCH_arm // 2. Closed 64-bit port: -DARM -DAARCH64 -D_LP64 -DTARGET_ARCH_arm // 3. Open 64-bit port: -DAARCH64 -D_LP64 -DTARGET_ARCH_aaarch64 +// 4. Open 32-bit port: -DAARCH32 -DTARGET_ARCH_aarch32 #ifdef ARM #define ARM_ONLY(code) code #define NOT_ARM(code) @@ -577,6 +578,14 @@ #define BIG_ENDIAN_ONLY(code) code #endif +#ifdef AARCH32 +#define AARCH32_ONLY(code) code +#define NOT_AARCH32(code) +#else +#define AARCH32_ONLY(code) +#define NOT_AARCH32(code) code +#endif + #define define_pd_global(type, name, value) const type pd_##name = value; // Helper macros for constructing file names for includes. --- old/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java 2018-09-25 19:24:01.000000000 +0300 +++ new/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java 2018-09-25 19:24:01.000000000 +0300 @@ -35,6 +35,7 @@ import sun.jvm.hotspot.debugger.MachineDescriptionAMD64; import sun.jvm.hotspot.debugger.MachineDescriptionPPC64; import sun.jvm.hotspot.debugger.MachineDescriptionAArch64; +import sun.jvm.hotspot.debugger.MachineDescriptionArm; import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit; import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit; @@ -589,6 +590,8 @@ machDesc = new MachineDescriptionPPC64(); } else if (cpu.equals("aarch64")) { machDesc = new MachineDescriptionAArch64(); + } else if (cpu.equals("arm")) { + machDesc = new MachineDescriptionArm(); } else if (cpu.equals("sparc")) { if (LinuxDebuggerLocal.getAddressSize()==8) { machDesc = new MachineDescriptionSPARC64Bit(); --- old/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java 2018-09-25 19:24:02.000000000 +0300 +++ new/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java 2018-09-25 19:24:02.000000000 +0300 @@ -54,7 +54,7 @@ public static boolean knownCPU(String cpu) { final String[] KNOWN = - new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; + new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "arm"}; for(String s : KNOWN) { if(s.equals(cpu)) --- old/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java 2018-09-25 19:24:04.000000000 +0300 +++ new/test/hotspot/jtreg/compiler/testlibrary/sha/predicate/IntrinsicPredicates.java 2018-09-25 19:24:03.000000000 +0300 @@ -94,6 +94,9 @@ new OrPredicate(new CPUSpecificPredicate("amd64.*", new String[] { "avx2", "bmi2" }, null), new CPUSpecificPredicate("x86_64", new String[] { "avx2", "bmi2" }, null)))))))))); + public static final BooleanSupplier AARCH32_NEON_AVAILABLE + = new CPUSpecificPredicate("arm.*", new String[] { "neon" }, null); + public static final BooleanSupplier ANY_SHA_INSTRUCTION_AVAILABLE = new OrPredicate(IntrinsicPredicates.SHA1_INSTRUCTION_AVAILABLE, new OrPredicate( --- old/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java 2018-09-25 19:24:05.000000000 +0300 +++ new/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java 2018-09-25 19:24:05.000000000 +0300 @@ -239,7 +239,7 @@ return Platform.isAix() || (Platform.isLinux() && (Platform.isPPC() || Platform.isS390x() || Platform.isX64() || - Platform.isX86())) || + Platform.isX86()) || Platform.isARM()) || Platform.isOSX() || Platform.isSolaris(); } --- /dev/null 2018-09-25 19:24:06.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/aarch32.ad 2018-09-25 19:24:06.000000000 +0300 @@ -0,0 +1,11817 @@ +// +// Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// + +// AARCH32 Architecture Description File + +//----------REGISTER DEFINITION BLOCK------------------------------------------ +// This information is used by the matcher and the register allocator to +// describe individual registers and classes of registers within the target +// archtecture. +register %{ +//----------Architecture Description Register Definitions---------------------- +// General Registers +// "reg_def" name ( register save type, C convention save type, +// ideal register type, encoding, vm name ); +// Register Save Types: +// +// NS = No-Save: The register allocator assumes that these registers +// can be used without saving upon entry to the method, & +// that they do not need to be saved at call sites. +// +// SOC = Save-On-Call: The register allocator assumes that these registers +// can be used without saving upon entry to the method, +// but that they must be saved at call sites. +// +// SOE = Save-On-Entry: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, but they do not need to be saved at call +// sites. +// +// AS = Always-Save: The register allocator assumes that these registers +// must be saved before using them upon entry to the +// method, & that they must be saved at call sites. +// +// Ideal Register Type is used to determine how to save & restore a +// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get +// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. +// +// The encoding number is the actual bit-pattern placed into the opcodes. + + +// ---------------------------- +// Integer/Long Registers +// ---------------------------- + +reg_def R_R0 (SOC, SOC, Op_RegI, 0, R(0)->as_VMReg()); +reg_def R_R1 (SOC, SOC, Op_RegI, 1, R(1)->as_VMReg()); +reg_def R_R2 (SOC, SOC, Op_RegI, 2, R(2)->as_VMReg()); +reg_def R_R3 (SOC, SOC, Op_RegI, 3, R(3)->as_VMReg()); +reg_def R_R4 (SOC, SOE, Op_RegI, 4, R(4)->as_VMReg()); +reg_def R_R5 (SOC, SOE, Op_RegI, 5, R(5)->as_VMReg()); +reg_def R_R6 (SOC, SOE, Op_RegI, 6, R(6)->as_VMReg()); +reg_def R_R7 (SOC, SOE, Op_RegI, 7, R(7)->as_VMReg()); +reg_def R_R8 (SOC, SOE, Op_RegI, 8, R(8)->as_VMReg()); +reg_def R_R9 (SOC, SOE, Op_RegI, 9, R(9)->as_VMReg()); +reg_def R_R10(NS, SOE, Op_RegI, 10, R(10)->as_VMReg()); +reg_def R_R11(NS, SOE, Op_RegI, 11, R(11)->as_VMReg()); +reg_def R_R12(SOC, SOC, Op_RegI, 12, R(12)->as_VMReg()); +reg_def R_R13(NS, NS, Op_RegI, 13, R(13)->as_VMReg()); +reg_def R_R14(SOC, SOC, Op_RegI, 14, R(14)->as_VMReg()); +reg_def R_R15(NS, NS, Op_RegI, 15, R(15)->as_VMReg()); + +// ---------------------------- +// Float/Double Registers +// ---------------------------- + +// Float Registers + +reg_def R_S0 ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()); +reg_def R_S1 ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()); +reg_def R_S2 ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()); +reg_def R_S3 ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()); +reg_def R_S4 ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()); +reg_def R_S5 ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()); +reg_def R_S6 ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()); +reg_def R_S7 ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()); +reg_def R_S8 ( SOC, SOC, Op_RegF, 8, f8->as_VMReg()); +reg_def R_S9 ( SOC, SOC, Op_RegF, 9, f9->as_VMReg()); +reg_def R_S10( SOC, SOC, Op_RegF, 10,f10->as_VMReg()); +reg_def R_S11( SOC, SOC, Op_RegF, 11,f11->as_VMReg()); +reg_def R_S12( SOC, SOC, Op_RegF, 12,f12->as_VMReg()); +reg_def R_S13( SOC, SOC, Op_RegF, 13,f13->as_VMReg()); +reg_def R_S14( SOC, SOC, Op_RegF, 14,f14->as_VMReg()); +reg_def R_S15( SOC, SOC, Op_RegF, 15,f15->as_VMReg()); +reg_def R_S16( SOC, SOE, Op_RegF, 16,f16->as_VMReg()); +reg_def R_S17( SOC, SOE, Op_RegF, 17,f17->as_VMReg()); +reg_def R_S18( SOC, SOE, Op_RegF, 18,f18->as_VMReg()); +reg_def R_S19( SOC, SOE, Op_RegF, 19,f19->as_VMReg()); +reg_def R_S20( SOC, SOE, Op_RegF, 20,f20->as_VMReg()); +reg_def R_S21( SOC, SOE, Op_RegF, 21,f21->as_VMReg()); +reg_def R_S22( SOC, SOE, Op_RegF, 22,f22->as_VMReg()); +reg_def R_S23( SOC, SOE, Op_RegF, 23,f23->as_VMReg()); +reg_def R_S24( SOC, SOE, Op_RegF, 24,f24->as_VMReg()); +reg_def R_S25( SOC, SOE, Op_RegF, 25,f25->as_VMReg()); +reg_def R_S26( SOC, SOE, Op_RegF, 26,f26->as_VMReg()); +reg_def R_S27( SOC, SOE, Op_RegF, 27,f27->as_VMReg()); +reg_def R_S28( SOC, SOE, Op_RegF, 28,f28->as_VMReg()); +reg_def R_S29( SOC, SOE, Op_RegF, 29,f29->as_VMReg()); +reg_def R_S30( SOC, SOE, Op_RegF, 30,f30->as_VMReg()); +reg_def R_S31( SOC, SOE, Op_RegF, 31,f31->as_VMReg()); + +// Double Registers +// The rules of ADL require that double registers be defined in pairs. +// Each pair must be two 32-bit values, but not necessarily a pair of +// single float registers. In each pair, ADLC-assigned register numbers +// must be adjacent, with the lower number even. Finally, when the +// CPU stores such a register pair to memory, the word associated with +// the lower ADLC-assigned number must be stored to the lower address. + +// TODO, the problem is that AArch32 port has same same numeric value for +// d16->as_VMReg and f1->as_VMReg which breaks reverse mapping from +// VMReg to OptoReg +// reg_def R_D16 (SOC, SOC, Op_RegD, 32, d16->as_VMReg()); +// reg_def R_D16x(SOC, SOC, Op_RegD,255, d16->as_VMReg()->next()); +// reg_def R_D17 (SOC, SOC, Op_RegD, 34, d17->as_VMReg()); +// reg_def R_D17x(SOC, SOC, Op_RegD,255, d17->as_VMReg()->next()); +// reg_def R_D18 (SOC, SOC, Op_RegD, 36, d18->as_VMReg()); +// reg_def R_D18x(SOC, SOC, Op_RegD,255, d18->as_VMReg()->next()); +// reg_def R_D19 (SOC, SOC, Op_RegD, 38, d19->as_VMReg()); +// reg_def R_D19x(SOC, SOC, Op_RegD,255, d19->as_VMReg()->next()); +// reg_def R_D20 (SOC, SOC, Op_RegD, 40, d20->as_VMReg()); +// reg_def R_D20x(SOC, SOC, Op_RegD,255, d20->as_VMReg()->next()); +// reg_def R_D21 (SOC, SOC, Op_RegD, 42, d21->as_VMReg()); +// reg_def R_D21x(SOC, SOC, Op_RegD,255, d21->as_VMReg()->next()); +// reg_def R_D22 (SOC, SOC, Op_RegD, 44, d22->as_VMReg()); +// reg_def R_D22x(SOC, SOC, Op_RegD,255, d22->as_VMReg()->next()); +// reg_def R_D23 (SOC, SOC, Op_RegD, 46, d23->as_VMReg()); +// reg_def R_D23x(SOC, SOC, Op_RegD,255, d23->as_VMReg()->next()); +// reg_def R_D24 (SOC, SOC, Op_RegD, 48, d24->as_VMReg()); +// reg_def R_D24x(SOC, SOC, Op_RegD,255, d24->as_VMReg()->next()); +// reg_def R_D25 (SOC, SOC, Op_RegD, 50, d25->as_VMReg()); +// reg_def R_D25x(SOC, SOC, Op_RegD,255, d25->as_VMReg()->next()); +// reg_def R_D26 (SOC, SOC, Op_RegD, 52, d26->as_VMReg()); +// reg_def R_D26x(SOC, SOC, Op_RegD,255, d26->as_VMReg()->next()); +// reg_def R_D27 (SOC, SOC, Op_RegD, 54, d27->as_VMReg()); +// reg_def R_D27x(SOC, SOC, Op_RegD,255, d27->as_VMReg()->next()); +// reg_def R_D28 (SOC, SOC, Op_RegD, 56, d28->as_VMReg()); +// reg_def R_D28x(SOC, SOC, Op_RegD,255, d28->as_VMReg()->next()); +// reg_def R_D29 (SOC, SOC, Op_RegD, 58, d29->as_VMReg()); +// reg_def R_D29x(SOC, SOC, Op_RegD,255, d29->as_VMReg()->next()); +// reg_def R_D30 (SOC, SOC, Op_RegD, 60, d30->as_VMReg()); +// reg_def R_D30x(SOC, SOC, Op_RegD,255, d30->as_VMReg()->next()); +// reg_def R_D31 (SOC, SOC, Op_RegD, 62, d31->as_VMReg()); +// reg_def R_D31x(SOC, SOC, Op_RegD,255, d31->as_VMReg()->next()); + +// ---------------------------- +// Special Registers +// Condition Codes Flag Registers +reg_def APSR (SOC, SOC, Op_RegFlags, 0, VMRegImpl::Bad()); +reg_def FPSCR(SOC, SOC, Op_RegFlags, 0, VMRegImpl::Bad()); + +// ---------------------------- +// Specify the enum values for the registers. These enums are only used by the +// OptoReg "class". We can convert these enum values at will to VMReg when needed +// for visibility to the rest of the vm. The order of this enum influences the +// register allocator so having the freedom to set this order and not be stuck +// with the order that is natural for the rest of the vm is worth it. + +// registers in that order so that R11/R12 is an aligned pair that can be used for longs +alloc_class chunk0( + R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R10, R_R13, R_R14, R_R15, R_R0, R_R1, R_R2, R_R3); + +// Note that a register is not allocatable unless it is also mentioned +// in a widely-used reg_class below. + +alloc_class chunk1( + R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23, + R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31, + R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7, + R_S8, R_S9, R_S10, R_S11, R_S12, R_S13, R_S14, R_S15 + // , + // R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x, + // R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x, + // R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x, + // R_D28, R_D28x,R_D29, R_D29x,R_D30, R_D30x,R_D31, R_D31x +); + +alloc_class chunk2(APSR, FPSCR); + +//----------Architecture Description Register Classes-------------------------- +// Several register classes are automatically defined based upon information in +// this architecture description. +// 1) reg_class inline_cache_reg ( as defined in frame section ) +// 2) reg_class interpreter_method_oop_reg ( as defined in frame section ) +// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// + +// ---------------------------- +// Integer Register Classes +// ---------------------------- +// Exclusions from i_reg: +// sp (R13), PC (R15) +// R10: reserved by HotSpot to the TLS register (invariant within Java) +reg_class int_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14); + +reg_class R0_regI(R_R0); +reg_class R1_regI(R_R1); +reg_class R2_regI(R_R2); +reg_class R3_regI(R_R3); +reg_class R9_regI(R_R9); +reg_class R12_regI(R_R12); + +// ---------------------------- +// Pointer Register Classes +// ---------------------------- +reg_class ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14); +// Special class for storeP instructions, which can store SP or RPC to TLS. +// It is also used for memory addressing, allowing direct TLS addressing. +reg_class sp_ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R9, R_R11, R_R12, R_R14, R_R8, R_R10 /* TLS*/, R_R13 /* SP*/); + +#define R_Ricklass R_R12 +#define R_Rmethod R_R8 +#define R_Rthread R_R10 +#define R_Rexception_obj R_R0 + +// Other special pointer regs +reg_class R0_regP(R_R0); +reg_class R1_regP(R_R1); +reg_class R2_regP(R_R2); +reg_class R4_regP(R_R4); +reg_class Rexception_regP(R_Rexception_obj); +reg_class Ricklass_regP(R_Ricklass); +reg_class Rmethod_regP(R_Rmethod); +reg_class Rthread_regP(R_Rthread); +reg_class IP_regP(R_R12); +reg_class LR_regP(R_R14); + +reg_class FP_regP(R_R11); + +// ---------------------------- +// Long Register Classes +// ---------------------------- +reg_class long_reg ( R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9, R_R11,R_R12); +// for ldrexd, strexd: first reg of pair must be even +reg_class long_reg_align ( R_R0,R_R1, R_R2,R_R3, R_R4,R_R5, R_R6,R_R7, R_R8,R_R9); + +reg_class R0R1_regL(R_R0,R_R1); +reg_class R2R3_regL(R_R2,R_R3); + +// ---------------------------- +// Special Class for Condition Code Flags Register +reg_class int_flags(APSR); +reg_class float_flags(FPSCR); + + +// ---------------------------- +// Float Point Register Classes +// ---------------------------- +// Skip f14/f15, they are reserved for mem-mem copies +reg_class sflt_reg(R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7, R_S8, R_S9, R_S10, R_S11, R_S12, R_S13, + R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23, R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31); + +// Paired floating point registers--they show up in the same order as the floats, +// but they are used with the "Op_RegD" type, and always occur in even/odd pairs. +reg_class dflt_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13, + R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31 + // , + // R_D16,R_D16x, R_D17,R_D17x, R_D18,R_D18x, R_D19,R_D19x, R_D20,R_D20x, R_D21,R_D21x, R_D22,R_D22x, + // R_D23,R_D23x, R_D24,R_D24x, R_D25,R_D25x, R_D26,R_D26x, R_D27,R_D27x, R_D28,R_D28x, R_D29,R_D29x, + // R_D30,R_D30x, R_D31,R_D31x + ); + +reg_class dflt_low_reg(R_S0,R_S1, R_S2,R_S3, R_S4,R_S5, R_S6,R_S7, R_S8,R_S9, R_S10,R_S11, R_S12,R_S13, + R_S16,R_S17, R_S18,R_S19, R_S20,R_S21, R_S22,R_S23, R_S24,R_S25, R_S26,R_S27, R_S28,R_S29, R_S30,R_S31); + + +reg_class actual_dflt_reg %{ + if (/*VM_Version::features() & FT_VFPV3D32*/0) { // TODO verify and enable + return DFLT_REG_mask(); + } else { + return DFLT_LOW_REG_mask(); + } +%} + +reg_class f0_regF(R_S0); +reg_class D0_regD(R_S0,R_S1); +reg_class D1_regD(R_S2,R_S3); +reg_class D2_regD(R_S4,R_S5); +reg_class D3_regD(R_S6,R_S7); +reg_class D4_regD(R_S8,R_S9); +reg_class D5_regD(R_S10,R_S11); +reg_class D6_regD(R_S12,R_S13); +reg_class D7_regD(R_S14,R_S15); +reg_class D0D1_regD(R_S0,R_S1,R_S2,R_S3); +reg_class D2D3_regD(R_S4,R_S5,R_S6,R_S7); + +// reg_class D16_regD(R_D16,R_D16x); +// reg_class D17_regD(R_D17,R_D17x); +// reg_class D18_regD(R_D18,R_D18x); +// reg_class D19_regD(R_D19,R_D19x); +// reg_class D20_regD(R_D20,R_D20x); +// reg_class D21_regD(R_D21,R_D21x); +// reg_class D22_regD(R_D22,R_D22x); +// reg_class D23_regD(R_D23,R_D23x); +// reg_class D24_regD(R_D24,R_D24x); +// reg_class D25_regD(R_D25,R_D25x); +// reg_class D26_regD(R_D26,R_D26x); +// reg_class D27_regD(R_D27,R_D27x); +// reg_class D28_regD(R_D28,R_D28x); +// reg_class D29_regD(R_D29,R_D29x); +// reg_class D30_regD(R_D30,R_D30x); +// reg_class D31_regD(R_D31,R_D31x); + +reg_class vectorx_reg(R_S0,R_S1,R_S2,R_S3, R_S4,R_S5,R_S6,R_S7, + R_S8,R_S9,R_S10,R_S11, /* skip f14/f15 */ + R_S16,R_S17,R_S18,R_S19, R_S20,R_S21,R_S22,R_S23, + R_S24,R_S25,R_S26,R_S27, R_S28,R_S29,R_S30,R_S31 + // , + // R_D16,R_D16x,R_D17,R_D17x, R_D18,R_D18x,R_D19,R_D19x, + // R_D20,R_D20x,R_D21,R_D21x, R_D22,R_D22x,R_D23,R_D23x, + // R_D24,R_D24x,R_D25,R_D25x, R_D26,R_D26x,R_D27,R_D27x, + // R_D28,R_D28x,R_D29,R_D29x, R_D30,R_D30x,R_D31,R_D31x + ); + +%} + +source_hpp %{ +// FIXME +const MachRegisterNumbers R_mem_copy_lo_num = R_S14_num; +const MachRegisterNumbers R_mem_copy_hi_num = R_S15_num; +const FloatRegister Rmemcopy = f14; +const MachRegisterNumbers R_hf_ret_lo_num = R_S0_num; +const MachRegisterNumbers R_hf_ret_hi_num = R_S1_num; + +const MachRegisterNumbers R_Ricklass_num = R_R12_num; +const MachRegisterNumbers R_Rmethod_num = R_R8_num; + +#define LDR_DOUBLE "FLDD" +#define LDR_FLOAT "FLDS" +#define STR_DOUBLE "FSTD" +#define STR_FLOAT "FSTS" +#define LDR_64 "LDRD" +#define STR_64 "STRD" +#define LDR_32 "LDR" +#define STR_32 "STR" +#define MOV_DOUBLE "FCPYD" +#define MOV_FLOAT "FCPYS" +#define FMSR "FMSR" +#define FMRS "FMRS" +#define LDREX "ldrex " +#define STREX "strex " + +static inline bool is_memoryD(int offset) { + return offset < 1024 && offset > -1024; +} + +static inline bool is_memoryfp(int offset) { + return offset < 1024 && offset > -1024; +} + +static inline bool is_memoryI(int offset) { + return offset < 4096 && offset > -4096; +} + +static inline bool is_memoryP(int offset) { + return offset < 4096 && offset > -4096; +} + +static inline bool is_memoryHD(int offset) { + return offset < 256 && offset > -256; +} + +static inline bool is_aimm(int imm) { + return Assembler::is_valid_for_imm12(imm); +} + +static inline bool is_limmI(jint imm) { + return Assembler::is_valid_for_imm12(imm); +} + +static inline bool is_limmI_low(jint imm, int n) { + int imml = imm & right_n_bits(n); + return is_limmI(imml) || is_limmI(imm); +} + +static inline int limmI_low(jint imm, int n) { + int imml = imm & right_n_bits(n); + return is_limmI(imml) ? imml : imm; +} + +%} + +source %{ + +// Given a register encoding, produce a Integer Register object +static Register reg_to_register_object(int register_encoding) { + assert(r0->encoding() == R_R0_enc && r15->encoding() == R_R15_enc, "right coding"); + return as_Register(register_encoding); +} + +// Given a register encoding, produce a Float Register object +static FloatRegister reg_to_FloatRegister_object(int register_encoding) { + assert(f0->encoding() == R_S0_enc && f31->encoding() == R_S31_enc, "right coding"); + // [d16,d31] share FloatRegister encoding with [f1,f31] since it numericall equals to ARM insn parameter encoding + // in contrary OptoReg encoding for d16+ is different + return as_FloatRegister((register_encoding&0x1f)|(register_encoding>>5)); +} + +void Compile::pd_compiler2_init() { + // Umimplemented +} + +OptoRegPair c2::return_value(int ideal_reg) { + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num, R_R0_num, R_hf_ret_lo_num, R_hf_ret_lo_num, R_R0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_hf_ret_hi_num, R_R1_num }; +#ifndef HARD_FLOAT_CC + assert(hasFPU(), "non-VFP java ABI is not supported"); +#endif + return OptoRegPair( hi[ideal_reg], lo[ideal_reg]); +} + +#ifndef HARD_FLOAT_CC +OptoRegPair c2::c_return_value(int ideal_reg) { + assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); + static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, R_R0_num, R_R0_num, R_R0_num, R_R0_num, R_R0_num }; + static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_R1_num, R_R1_num }; + return OptoRegPair( hi[ideal_reg], lo[ideal_reg]); +} +#endif + +// !!!!! Special hack to get all type of calls to specify the byte offset +// from the start of the call to the point where the return address +// will point. + +static uint call_static_enc_size(const MachCallNode *n, ciMethod *_method, bool _method_handle_invoke) { + int call_sz = (_method == NULL) ? + (maybe_far_call(n) ? 3 : 1) : + (far_branches() ? NativeCall::instruction_size / NativeInstruction::arm_insn_sz : 1); + return (call_sz + (_method_handle_invoke ? 2 : 0)) * + NativeInstruction::arm_insn_sz; +} + +static uint call_dynamic_enc_size() { + return 2 * NativeInstruction::arm_insn_sz + + (far_branches() ? NativeCall::instruction_size : NativeInstruction::arm_insn_sz); +} + +static uint call_runtime_enc_size(const MachCallNode *n) { + // bl or movw; movt; blx + bool far = maybe_far_call(n); + return (far ? 3 : 1) * NativeInstruction::arm_insn_sz; +} + +int MachCallStaticJavaNode::ret_addr_offset() { + return call_static_enc_size(this, _method, _method_handle_invoke) - + (_method_handle_invoke ? 1 : 0) * NativeInstruction::arm_insn_sz; +} + +int MachCallDynamicJavaNode::ret_addr_offset() { + return call_dynamic_enc_size(); +} + +int MachCallRuntimeNode::ret_addr_offset() { + return call_runtime_enc_size(this); +} +%} + +// The intptr_t operand types, defined by textual substitution. +// (Cf. opto/type.hpp. This lets us avoid many, many other ifdefs.) +#define immX immI +#define immXRot immIRot +#define iRegX iRegI +#define aimmX aimmI +#define limmX limmI +#define immX10x2 immI10x2 +#define LShiftX LShiftI +#define shimmX immU5 + +// Compatibility interface +#define aimmP immPRot +#define immIMov immIRot + +#define store_RegL iRegL +#define store_RegLd iRegLd +#define store_RegI iRegI +#define store_ptr_RegP iRegP + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Operand Attributes------------------------------------------------- +op_attrib op_cost(1); // Required cost attribute + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +//----------Simple Operands---------------------------------------------------- +// Immediate Operands + +operand immIRot() %{ + predicate(Assembler::is_valid_for_imm12(n->get_int())); + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immIRotn() %{ + predicate(n->get_int() != 0 && Assembler::is_valid_for_imm12(~n->get_int())); + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immIRotneg() %{ + // if Assembler::is_valid_for_imm12() is true for this constant, it is + // a immIRot and an optimal instruction combination exists to handle the + // constant as an immIRot + predicate(!Assembler::is_valid_for_imm12(n->get_int()) && Assembler::is_valid_for_imm12(-n->get_int())); + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Non-negative integer immediate that is encodable using the rotation scheme, +// and that when expanded fits in 31 bits. +operand immU31Rot() %{ + predicate((0 <= n->get_int()) && Assembler::is_valid_for_imm12(n->get_int())); + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immPRot() %{ + predicate(n->get_ptr() == 0 || (Assembler::is_valid_for_imm12(n->get_ptr()) && ((ConPNode*)n)->type()->reloc() == relocInfo::none)); + + match(ConP); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immLlowRot() %{ + predicate(n->get_long() >> 32 == 0 && Assembler::is_valid_for_imm12((int)n->get_long())); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immLRot2() %{ + predicate(Assembler::is_valid_for_imm12((int)(n->get_long() >> 32)) && + Assembler::is_valid_for_imm12((int)(n->get_long()))); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 12-bit - for addressing mode +operand immI12() %{ + predicate((-4096 < n->get_int()) && (n->get_int() < 4096)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 10-bit disp and disp+4 - for addressing float pair +operand immI10x2() %{ + predicate((-1024 < n->get_int()) && (n->get_int() < 1024 - 4)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 12-bit disp and disp+4 - for addressing word pair +operand immI12x2() %{ + predicate((-4096 < n->get_int()) && (n->get_int() < 4096 - 4)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +//----------DEFINITION BLOCK--------------------------------------------------- +// Define name --> value mappings to inform the ADLC of an integer valued name +// Current support includes integer values in the range [0, 0x7FFFFFFF] +// Format: +// int_def ( , ); +// Generated Code in ad_.hpp +// #define () +// // value == +// Generated code in ad_.cpp adlc_verification() +// assert( == , "Expect () to equal "); +// +definitions %{ +// The default cost (of an ALU instruction). + int_def DEFAULT_COST ( 100, 100); + int_def HUGE_COST (1000000, 1000000); + +// Memory refs are twice as expensive as run-of-the-mill. + int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); + +// Branches are even more expensive. + int_def BRANCH_COST ( 300, DEFAULT_COST * 3); + int_def CALL_COST ( 300, DEFAULT_COST * 3); +%} + + +//----------SOURCE BLOCK------------------------------------------------------- +// This is a block of C++ code which provides values, functions, and +// definitions necessary in the rest of the architecture description +source_hpp %{ +// Header information of the source block. +// Method declarations/definitions which are used outside +// the ad-scope can conveniently be defined here. +// +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) __ stop(error) +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#define STOP(error) __ block_comment(error); stop(error) +#endif + +#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":") + +// Does destination need to be loaded in a register then passed to a +// branch instruction? +extern bool maybe_far_call(const CallNode *n); +extern bool maybe_far_call(const MachCallNode *n); +static inline bool cache_reachable() { + return MacroAssembler::_cache_fully_reachable(); +} +static inline bool far_branches() { + return MacroAssembler::far_branches(); +} + +extern bool PrintOptoAssembly; + +class c2 { +public: + static OptoRegPair return_value(int ideal_reg); +#ifndef HARD_FLOAT_CC + static OptoRegPair c_return_value(int ideal_reg); +#endif +}; + +class CallStubImpl { + + //-------------------------------------------------------------- + //---< Used for optimization in Compile::Shorten_branches >--- + //-------------------------------------------------------------- + + public: + // Size of call trampoline stub. + static uint size_call_trampoline() { + return 0; // no call trampolines on this platform + } + + // number of relocations needed by a call trampoline stub + static uint reloc_call_trampoline() { + return 0; // no call trampolines on this platform + } +}; + +class HandlerImpl { + + public: + + static int emit_exception_handler(CodeBuffer &cbuf); + static int emit_deopt_handler(CodeBuffer& cbuf); + + static uint size_exception_handler() { + return ( 3 * 4 ); + } + + + static uint size_deopt_handler() { + return ( 9 * 4 ); + } + +}; + +%} + +source %{ +#define __ _masm. + +static FloatRegister reg_to_FloatRegister_object(int register_encoding); +static Register reg_to_register_object(int register_encoding); + + +// **************************************************************************** + +// REQUIRED FUNCTIONALITY + +// Indicate if the safepoint node needs the polling page as an input. +// Since ARM does not have absolute addressing, it does. +bool SafePointNode::needs_polling_address_input() { + return true; +} + +// emit an interrupt that is caught by the debugger (for debugging compiler) +void emit_break(CodeBuffer &cbuf) { + MacroAssembler _masm(&cbuf); + __ bkpt(0); +} + +#ifndef PRODUCT +void MachBreakpointNode::format( PhaseRegAlloc *, outputStream *st ) const { + st->print("TA"); +} +#endif + +void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + emit_break(cbuf); +} + +uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + + +void emit_nop(CodeBuffer &cbuf) { + MacroAssembler _masm(&cbuf); + __ nop(); +} + + +void emit_call_reloc(CodeBuffer &cbuf, const MachCallNode *n, MachOper *m, RelocationHolder const& rspec) { + int ret_addr_offset0 = n->as_MachCall()->ret_addr_offset(); + int call_site_offset = cbuf.insts()->mark_off(); + MacroAssembler _masm(&cbuf); + __ set_inst_mark(); // needed in emit_to_interp_stub() to locate the call + address target = (address)m->method(); + assert(n->as_MachCall()->entry_point() == target, "sanity"); + assert(maybe_far_call(n) == !__ reachable_from_cache(target), "sanity"); + assert(cache_reachable() == __ cache_fully_reachable(), "sanity"); + + assert(target != NULL, "need real address"); + + if (rspec.type() == relocInfo::runtime_call_type || + rspec.type() == relocInfo::none) { + __ call(target, rspec); + } else { + __ trampoline_call(Address(target, rspec), NULL); + } + int ret_addr_offset = __ offset(); + assert(ret_addr_offset - call_site_offset == ret_addr_offset0, "fix ret_addr_offset()"); +} + +//============================================================================= +// REQUIRED FUNCTIONALITY for encoding +void emit_lo(CodeBuffer &cbuf, int val) { } +void emit_hi(CodeBuffer &cbuf, int val) { } + + +//============================================================================= +const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask(); + +int Compile::ConstantTable::calculate_table_base_offset() const { + int offset = -(size() / 2); + // vldr_f32, vldr_f64: 8-bit offset multiplied by 4: +/- 1024 + // ldr, ldrb : 12-bit offset: +/- 4096 + if (!Assembler::is_simm10(offset)) { + offset = Assembler::min_simm10(); + } + return offset; +} + +bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } +void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { + ShouldNotReachHere(); +} + +void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { + Compile* C = ra_->C; + Compile::ConstantTable& constant_table = C->constant_table(); + MacroAssembler _masm(&cbuf); + + Register r = as_Register(ra_->get_encode(this)); + CodeSection* consts_section = __ code()->consts(); + int consts_size = consts_section->align_at_start(consts_section->size()); + assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size); + + // Materialize the constant table base. + address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); + RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); + __ mov_address(r, baseaddr, rspec); +} + +uint MachConstantBaseNode::size(PhaseRegAlloc*) const { + return 8; +} + +#ifndef PRODUCT +void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { + char reg[128]; + ra_->dump_register(this, reg); + st->print("MOV_SLOW &constanttable,%s\t! constant table base", reg); +} +#endif + +#ifndef PRODUCT +void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + Compile* C = ra_->C; + + for (int i = 0; i < OptoPrologueNops; i++) { + st->print_cr("NOP"); st->print("\t"); + } + + size_t framesize = C->frame_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + int bangsize = C->bang_size_in_bytes(); + // Remove two words for return addr and rbp, + framesize -= 2*wordSize; + bangsize -= 2*wordSize; + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize)) { + st->print_cr("! stack bang (%d bytes)", bangsize); st->print("\t"); + } + st->print_cr("PUSH R_FP|R_LR_LR"); st->print("\t"); + if (framesize != 0) { + st->print ("SUB R_SP, R_SP, " SIZE_FORMAT,framesize); + } +} +#endif + +void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + Compile* C = ra_->C; + MacroAssembler _masm(&cbuf); + + // insert a nop at the start of the prolog so we can patch in a + // branch if we need to invalidate the method later + __ nop(); + + size_t framesize = C->frame_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + int bangsize = C->bang_size_in_bytes(); + // Remove two words for return addr and fp, + framesize -= 2*wordSize; + bangsize -= 2*wordSize; + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (C->need_stack_bang(bangsize)) { + __ arm_stack_overflow_check(bangsize, r12); + } + + __ push(RegSet::of(rfp, lr), sp); + if (framesize != 0) { + __ sub(sp, sp, framesize); + } + + // offset from scratch buffer is not valid + if (strcmp(cbuf.name(), "Compile::Fill_buffer") == 0) { + C->set_frame_complete( __ offset() ); + } + + if (C->has_mach_constant_base_node()) { + // NOTE: We set the table base offset here because users might be + // emitted before MachConstantBaseNode. + Compile::ConstantTable& constant_table = C->constant_table(); + constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); + } +} + +uint MachPrologNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +int MachPrologNode::reloc() const { + return 10; // a large enough number +} + +//============================================================================= +#ifndef PRODUCT +void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + Compile* C = ra_->C; + + size_t framesize = C->frame_size_in_bytes(); + framesize -= 2*wordSize; + + if (framesize != 0) { + st->print("ADD R_SP, R_SP, " SIZE_FORMAT "\n\t",framesize); + } + st->print("POP R_FP|R_LR_LR"); + + if (do_polling() && ra_->C->is_method_compilation()) { + st->print("\n\t"); + st->print("MOV r12, #PollAddr\t! Load Polling address\n\t"); + st->print("LDR r12,[r12]\t!Poll for Safepointing"); + } +} +#endif + +void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + Compile* C = ra_->C; + + size_t framesize = C->frame_size_in_bytes(); + framesize -= 2*wordSize; + if (framesize != 0) { + __ add(sp, sp, framesize); + } + __ pop(RegSet::of(rfp, lr), sp); + + if (StackReservedPages > 0 && C->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + // If this does safepoint polling, then do it here + if (do_polling() && ra_->C->is_method_compilation()) { + // mov here is usually one or two instruction + __ mov_address(r12, (address)os::get_polling_page(), RelocationHolder::none); + __ relocate(relocInfo::poll_return_type); + __ ldr(r12, Address(r12)); + } +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + +int MachEpilogNode::reloc() const { + return 16; // a large enough number +} + +const Pipeline * MachEpilogNode::pipeline() const { + return MachNode::pipeline_class(); +} + +int MachEpilogNode::safepoint_offset() const { + assert( do_polling(), "no return for this epilog node"); + // return MacroAssembler::size_of_sethi(os::get_polling_page()); + Unimplemented(); + return 0; +} + +//============================================================================= + +// Figure out which register class each belongs in: rc_int, rc_float, rc_stack +enum RC { rc_bad, rc_int, rc_float, rc_stack }; +static enum RC rc_class( OptoReg::Name reg ) { + if (!OptoReg::is_valid(reg)) return rc_bad; + if (OptoReg::is_stack(reg)) return rc_stack; + VMReg r = OptoReg::as_VMReg(reg); + if (r->is_Register()) return rc_int; + assert(r->is_FloatRegister(), "must be"); + return rc_float; +} + +static inline bool is_iRegLd_memhd(OptoReg::Name src_first, OptoReg::Name src_second, int offset) { + int rlo = Matcher::_regEncode[src_first]; + int rhi = Matcher::_regEncode[src_second]; + // if (!((rlo&1)==0 && (rlo+1 == rhi))) { + // tty->print_cr("CAUGHT BAD LDRD/STRD"); + // } + return (rlo&1)==0 && (rlo+1 == rhi) && is_memoryHD(offset); +} + +uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, + PhaseRegAlloc *ra_, + bool do_size, + outputStream* st ) const { + // Get registers to move + OptoReg::Name src_second = ra_->get_reg_second(in(1)); + OptoReg::Name src_first = ra_->get_reg_first(in(1)); + OptoReg::Name dst_second = ra_->get_reg_second(this ); + OptoReg::Name dst_first = ra_->get_reg_first(this ); + + enum RC src_second_rc = rc_class(src_second); + enum RC src_first_rc = rc_class(src_first); + enum RC dst_second_rc = rc_class(dst_second); + enum RC dst_first_rc = rc_class(dst_first); + + assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); + + // Generate spill code! + int size = 0; + + if (src_first == dst_first && src_second == dst_second) + return size; // Self copy, no move + +#ifdef TODO + if (bottom_type()->isa_vect() != NULL) { + } +#endif + + // Shared code does not expect instruction set capability based bailouts here. + // Handle offset unreachable bailout with minimal change in shared code. + // Bailout only for real instruction emit. + // This requires a single comment change in shared code. ( see output.cpp "Normal" instruction case ) + + MacroAssembler _masm(cbuf); + + // -------------------------------------- + // Check for mem-mem move. Load into unused float registers and fall into + // the float-store case. + if (src_first_rc == rc_stack && dst_first_rc == rc_stack) { + int offset = ra_->reg2offset(src_first); + if (cbuf && !is_memoryfp(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous"); + src_first = OptoReg::Name(R_mem_copy_lo_num); + src_second = OptoReg::Name(R_mem_copy_hi_num); + src_first_rc = rc_float; + src_second_rc = rc_float; + if (cbuf) { + __ vldr_f64(Rmemcopy, Address(sp, offset)); + } else if (!do_size) { + st->print(LDR_DOUBLE " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset); + } + } else { + src_first = OptoReg::Name(R_mem_copy_lo_num); + src_first_rc = rc_float; + if (cbuf) { + __ vldr_f32(Rmemcopy, Address(sp, offset)); + } else if (!do_size) { + st->print(LDR_FLOAT " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset); + } + } + size += 4; + } + } + + if (src_second_rc == rc_stack && dst_second_rc == rc_stack) { + Unimplemented(); + } + + // -------------------------------------- + // Check for integer reg-reg copy + if (src_first_rc == rc_int && dst_first_rc == rc_int) { + // Else normal reg-reg copy + assert( src_second != dst_first, "smashed second before evacuating it" ); + if (cbuf) { + __ mov(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + st->print("MOV R_%s, R_%s\t# spill", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + size += 4; + } + + // Check for integer store + if (src_first_rc == rc_int && dst_first_rc == rc_stack) { + int offset = ra_->reg2offset(dst_first); + if (cbuf && !is_memoryI(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (src_second_rc != rc_bad && is_iRegLd_memhd(src_first, src_second, offset)) { + assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous"); + if (cbuf) { + __ strd(reg_to_register_object(Matcher::_regEncode[src_first]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(STR_64 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ str(reg_to_register_object(Matcher::_regEncode[src_first]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(STR_32 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first), offset); +#endif + } + } + } + size += 4; + } + + // Check for integer load + if (dst_first_rc == rc_int && src_first_rc == rc_stack) { + int offset = ra_->reg2offset(src_first); + if (cbuf && !is_memoryI(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (src_second_rc != rc_bad && is_iRegLd_memhd(dst_first, dst_second, offset)) { + assert((src_first&1)==0 && src_first+1 == src_second, "pair of registers must be aligned/contiguous"); + if (cbuf) { + __ ldrd(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(LDR_64 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ ldr(reg_to_register_object(Matcher::_regEncode[dst_first]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(LDR_32 " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first), offset); +#endif + } + } + } + size += 4; + } + + // Check for float reg-reg copy + if (src_first_rc == rc_float && dst_first_rc == rc_float) { + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous"); + if (cbuf) { + __ vmov_f64(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + st->print(MOV_DOUBLE " R_%s, R_%s\t# spill", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + return 4; + } + if (cbuf) { + __ vmov_f32(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + st->print(MOV_FLOAT " R_%s, R_%s\t# spill", + Matcher::regName[dst_first], + Matcher::regName[src_first]); +#endif + } + size = 4; + } + + // Check for float store + if (src_first_rc == rc_float && dst_first_rc == rc_stack) { + int offset = ra_->reg2offset(dst_first); + if (cbuf && !is_memoryfp(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + // Further check for aligned-adjacent pair, so we can use a double store + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous"); + if (cbuf) { + __ vstr_f64(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(STR_DOUBLE " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ vstr_f32(reg_to_FloatRegister_object(Matcher::_regEncode[src_first]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(STR_FLOAT " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_first),offset); +#endif + } + } + } + size += 4; + } + + // Check for float load + if (dst_first_rc == rc_float && src_first_rc == rc_stack) { + int offset = ra_->reg2offset(src_first); + if (cbuf && !is_memoryfp(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + // Further check for aligned-adjacent pair, so we can use a double store + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers and stack slots must be aligned/contiguous"); + if (cbuf) { + __ vldr_f64(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(LDR_DOUBLE " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ vldr_f32(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(LDR_FLOAT " R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_first),offset); +#endif + } + } + } + size += 4; + } + + // check for int reg -> float reg move + if (src_first_rc == rc_int && dst_first_rc == rc_float) { + // Further check for aligned-adjacent pair, so we can use a single instruction + if (src_second_rc != rc_bad) { + assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous"); + assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous"); + assert(src_second_rc == rc_int && dst_second_rc == rc_float, "unsupported"); + if (cbuf) { + __ vmov_f64(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first]), reg_to_register_object(Matcher::_regEncode[src_second])); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("FMDRR R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first), OptoReg::regname(src_second)); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ vmov_f32(reg_to_FloatRegister_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(FMSR " R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first)); +#endif + } + size += 4; + } + } + + // check for float reg -> int reg move + if (src_first_rc == rc_float && dst_first_rc == rc_int) { + // Further check for aligned-adjacent pair, so we can use a single instruction + if (src_second_rc != rc_bad) { + assert((src_first&1)==0 && src_first+1 == src_second, "pairs of registers must be aligned/contiguous"); + assert((dst_first&1)==0 && dst_first+1 == dst_second, "pairs of registers must be aligned/contiguous"); + assert(src_second_rc == rc_float && dst_second_rc == rc_int, "unsupported"); + if (cbuf) { + __ vmov_f64(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("FMRRD R_%s, R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(dst_second), OptoReg::regname(src_first)); +#endif + } + return size + 4; + } else { + if (cbuf) { + __ vmov_f32(reg_to_register_object(Matcher::_regEncode[dst_first]), reg_to_FloatRegister_object(Matcher::_regEncode[src_first])); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print(FMRS " R_%s, R_%s\t! spill",OptoReg::regname(dst_first), OptoReg::regname(src_first)); +#endif + } + size += 4; + } + } + + // -------------------------------------------------------------------- + // Check for hi bits still needing moving. Only happens for misaligned + // arguments to native calls. + if (src_second == dst_second) + return size; // Self copy; no move + assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); + + // Check for integer reg-reg copy. Hi bits are stuck up in the top + // 32-bits of a 64-bit register, but are needed in low bits of another + // register (else it's a hi-bits-to-hi-bits copy which should have + // happened already as part of a 64-bit move) + if (src_second_rc == rc_int && dst_second_rc == rc_int) { + if (cbuf) { + __ mov(reg_to_register_object(Matcher::_regEncode[dst_second]), reg_to_register_object(Matcher::_regEncode[src_second])); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("MOV R_%s, R_%s\t# spill high", + Matcher::regName[dst_second], + Matcher::regName[src_second]); +#endif + } + return size+4; + } + + // Check for high word integer store + if (src_second_rc == rc_int && dst_second_rc == rc_stack) { + int offset = ra_->reg2offset(dst_second); + + if (cbuf && !is_memoryP(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (cbuf) { + __ str(reg_to_register_object(Matcher::_regEncode[src_second]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("STR R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(src_second), offset); +#endif + } + } + return size + 4; + } + + // Check for high word integer load + if (dst_second_rc == rc_int && src_second_rc == rc_stack) { + int offset = ra_->reg2offset(src_second); + if (cbuf && !is_memoryP(offset)) { + ra_->C->record_method_not_compilable("unable to handle large constant offsets"); + return 0; + } else { + if (cbuf) { + __ ldr(reg_to_register_object(Matcher::_regEncode[dst_second]), Address(sp, offset)); +#ifndef PRODUCT + } else if (!do_size) { + if (size != 0) st->print("\n\t"); + st->print("LDR R_%s,[R_SP + #%d]\t! spill",OptoReg::regname(dst_second), offset); +#endif + } + } + return size + 4; + } + + Unimplemented(); + return 0; // Mute compiler +} + +#ifndef PRODUCT +void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + implementation( NULL, ra_, false, st ); +} +#endif + +void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + implementation( &cbuf, ra_, false, NULL ); +} + +uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { + return implementation( NULL, ra_, true, NULL ); +} + +//============================================================================= +#ifndef PRODUCT +void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const { + st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); +} +#endif + +void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { + MacroAssembler _masm(&cbuf); + for(int i = 0; i < _count; i += 1) { + __ nop(); + } +} + +uint MachNopNode::size(PhaseRegAlloc *ra_) const { + return 4 * _count; +} + + +//============================================================================= +#ifndef PRODUCT +void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_reg_first(this); + st->print("ADD %s,R_SP+#%d",Matcher::regName[reg], offset); +} +#endif + +void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); + int reg = ra_->get_encode(this); + Register dst = reg_to_register_object(reg); + + if (is_aimm(offset)) { + __ add(dst, sp, offset); + } else { + __ mov(dst, offset); + __ add(dst, sp, dst); + } +} + +uint BoxLockNode::size(PhaseRegAlloc *ra_) const { + // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_) + assert(ra_ == ra_->C->regalloc(), "sanity"); + return ra_->C->scratch_emit_size(this); +} + +//============================================================================= +#ifndef PRODUCT +#define R_RTEMP "R_R12" +void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const { + st->print_cr("\nUEP:"); + if (UseCompressedClassPointers) { + st->print_cr("\tLDR_w " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check"); + st->print_cr("\tdecode_klass " R_RTEMP); + } else { + st->print_cr("\tLDR " R_RTEMP ",[R_R0 + oopDesc::klass_offset_in_bytes]\t! Inline cache check"); + } + st->print_cr("\tCMP " R_RTEMP ",R_R12" ); + st->print ("\tB.NE SharedRuntime::handle_ic_miss_stub"); +} +#endif + +void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { + MacroAssembler _masm(&cbuf); + Register iCache = reg_to_register_object(Matcher::inline_cache_reg_encode()); + assert(iCache == rscratch2/*Ricklass*/, "should be"); + Register receiver = r0; + + __ load_klass(r9, receiver); + __ cmp(r9, iCache); + // r9 seems temporary here + __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, r9, Assembler::NE); +} + +uint MachUEPNode::size(PhaseRegAlloc *ra_) const { + return MachNode::size(ra_); +} + + +// REQUIRED EMIT CODE + +//============================================================================= + +// Emit exception handler code. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(size_exception_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + + // OK to trash LR, because exception blob will kill it + __ jump(OptoRuntime::exception_blob()->entry_point(), relocInfo::runtime_call_type, lr); + + assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); + + __ end_a_stub(); + + return offset; +} + +int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { + // Can't use any of the current frame's registers as we may have deopted + // at a poll and everything can be live. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(size_deopt_handler()); + if (base == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return 0; // CodeBuffer::expand failed + } + + int offset = __ offset(); + address deopt_pc = __ pc(); + + __ sub(sp, sp, wordSize); // make room for saved PC + __ push(lr); // save LR that may be live when we get here + __ mov_relative_address(lr, deopt_pc); + __ str(lr, Address(sp, wordSize)); // save deopt PC + __ pop(lr); // restore LR + // rscratch1 seems killed at deopt_blob + __ jump(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type, rscratch1); + + assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); + + __ end_a_stub(); + return offset; +} + +// REQUIRED MATCHER CODE + +//============================================================================= + +const bool Matcher::match_rule_supported(int opcode) { + if (!has_match_rule(opcode)) + return false; + + switch (opcode) { + case Op_PopCountI: + case Op_PopCountL: + if (!UsePopCountInstruction) + return false; + break; + case Op_LShiftCntV: + case Op_RShiftCntV: + case Op_AddVB: + case Op_AddVS: + case Op_AddVI: + case Op_AddVL: + case Op_SubVB: + case Op_SubVS: + case Op_SubVI: + case Op_SubVL: + case Op_MulVS: + case Op_MulVI: + case Op_LShiftVB: + case Op_LShiftVS: + case Op_LShiftVI: + case Op_LShiftVL: + case Op_RShiftVB: + case Op_RShiftVS: + case Op_RShiftVI: + case Op_RShiftVL: + case Op_URShiftVB: + case Op_URShiftVS: + case Op_URShiftVI: + case Op_URShiftVL: + case Op_AndV: + case Op_OrV: + case Op_XorV: + return VM_Version::features() & FT_AdvSIMD; + case Op_LoadVector: + case Op_StoreVector: + case Op_AddVF: + case Op_SubVF: + case Op_MulVF: + return VM_Version::features() & (FT_VFPV2 | FT_AdvSIMD); + case Op_AddVD: + case Op_SubVD: + case Op_MulVD: + case Op_DivVF: + case Op_DivVD: + return VM_Version::features() & FT_VFPV2; + } + + return true; // Per default match rules are supported. +} + +const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { + + // TODO + // identify extra cases that we might want to provide match rules for + // e.g. Op_ vector nodes and other intrinsics while guarding with vlen + bool ret_value = match_rule_supported(opcode); + // Add rules here. + + return ret_value; // Per default match rules are supported. +} + +const bool Matcher::has_predicated_vectors(void) { + return false; +} + +const int Matcher::float_pressure(int default_pressure_threshold) { + return default_pressure_threshold; +} + +int Matcher::regnum_to_fpu_offset(int regnum) { + return regnum - 32; // The FP registers are in the second chunk +} + +// Vector width in bytes +const int Matcher::vector_width_in_bytes(BasicType bt) { + return MaxVectorSize; +} + +// Vector ideal reg corresponding to specified size in bytes +const uint Matcher::vector_ideal_reg(int size) { + assert(MaxVectorSize >= size, ""); + switch(size) { + case 8: return Op_VecD; + case 16: return Op_VecX; + } + ShouldNotReachHere(); + return 0; +} + +const uint Matcher::vector_shift_count_ideal_reg(int size) { + return vector_ideal_reg(size); +} + +// Limits on vector size (number of elements) loaded into vector. +const int Matcher::max_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return vector_width_in_bytes(bt)/type2aelembytes(bt); +} + +const int Matcher::min_vector_size(const BasicType bt) { + assert(is_java_primitive(bt), "only primitive type vectors"); + return 8/type2aelembytes(bt); +} + +// ARM doesn't support misaligned vectors store/load. +const bool Matcher::misaligned_vectors_ok() { + return false; +} + +// ARM doesn't support AES intrinsics +const bool Matcher::pass_original_key_for_aes() { + return false; +} + +const bool Matcher::convL2FSupported(void) { + return false; // TODO why not? +} + +// Is this branch offset short enough that a short branch can be used? +// +// NOTE: If the platform does not provide any short branch variants, then +// this method should return false for offset 0. +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + // The passed offset is relative to address of the branch. + // On ARM a branch displacement is calculated relative to address + // of the branch + 8. + // + // offset -= 8; + // return (Assembler::is_simm24(offset)); + return false; +} + +const bool Matcher::isSimpleConstant64(jlong value) { + // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. + return false; +} + +// No scaling for the parameter the ClearArray node. +const bool Matcher::init_array_count_is_in_bytes = true; + +// Needs 2 CMOV's for longs. +const int Matcher::long_cmove_cost() { return 2; } + +// CMOVF/CMOVD are expensive on ARM. +const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } + +// Does the CPU require late expand (see block.cpp for description of late expand)? +const bool Matcher::require_postalloc_expand = false; + +// Do we need to mask the count passed to shift instructions or does +// the cpu only look at the lower 5/6 bits anyway? +// FIXME: does this handle vector shifts as well? +const bool Matcher::need_masked_shift_count = true; + +const bool Matcher::convi2l_type_required = true; + +// Should the Matcher clone shifts on addressing modes, expecting them +// to be subsumed into complex addressing expressions or compute them +// into registers? +bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { + return clone_base_plus_offset_address(m, mstack, address_visited); +} + +void Compile::reshape_address(AddPNode* addp) { +} + +bool Matcher::narrow_oop_use_complex_address() { + ShouldNotCallThis(); + return false; +} + +bool Matcher::narrow_klass_use_complex_address() { + ShouldNotCallThis(); + return false; +} + +bool Matcher::const_oop_prefer_decode() { + ShouldNotCallThis(); + return true; +} + +bool Matcher::const_klass_prefer_decode() { + ShouldNotCallThis(); + return true; +} + +// Is it better to copy float constants, or load them directly from memory? +// Intel can load a float constant from a direct address, requiring no +// extra registers. Most RISCs will have to materialize an address into a +// register first, so they would do better to copy the constant from stack. +const bool Matcher::rematerialize_float_constants = false; + +// If CPU can load and store mis-aligned doubles directly then no fixup is +// needed. Else we split the double into 2 integer pieces and move it +// piece-by-piece. Only happens when passing doubles into C code as the +// Java calling convention forces doubles to be aligned. +const bool Matcher::misaligned_doubles_ok = false; + +// No-op on ARM. +void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { +} + +// Advertise here if the CPU requires explicit rounding operations +// to implement the UseStrictFP mode. +const bool Matcher::strict_fp_requires_explicit_rounding = false; + +// Are floats converted to double when stored to stack during deoptimization? +// ARM does not handle callee-save floats. +bool Matcher::float_in_double() { + return false; +} + +// Do ints take an entire long register or just half? +// Note that we if-def off of _LP64. +// The relevant question is how the int is callee-saved. In _LP64 +// the whole long is written but de-opt'ing will have to extract +// the relevant 32 bits, in not-_LP64 only the low 32 bits is written. +const bool Matcher::int_in_long = false; + +// Return whether or not this register is ever used as an argument. This +// function is used on startup to build the trampoline stubs in generateOptoStub. +// Registers not mentioned will be killed by the VM call in the trampoline, and +// arguments in those registers not be available to the callee. +bool Matcher::can_be_java_arg( int reg ) { + if (reg == R_R0_num || + reg == R_R1_num || + reg == R_R2_num || + reg == R_R3_num) return true; + + if (reg >= R_S0_num && + reg <= R_S15_num) return true; + return false; +} + +bool Matcher::is_spillable_arg( int reg ) { + return can_be_java_arg(reg); +} + +bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { + return false; +} + +// Register for DIVI projection of divmodI +RegMask Matcher::divI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODI projection of divmodI +RegMask Matcher::modI_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for DIVL projection of divmodL +RegMask Matcher::divL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +// Register for MODL projection of divmodL +RegMask Matcher::modL_proj_mask() { + ShouldNotReachHere(); + return RegMask(); +} + +const RegMask Matcher::method_handle_invoke_SP_save_mask() { + return FP_REGP_mask(); +} + +bool maybe_far_call(const CallNode *n) { + return !MacroAssembler::_reachable_from_cache(n->as_Call()->entry_point()); +} + +bool maybe_far_call(const MachCallNode *n) { + return !MacroAssembler::_reachable_from_cache(n->as_MachCall()->entry_point()); +} + +%} + +//----------ENCODING BLOCK----------------------------------------------------- +// This block specifies the encoding classes used by the compiler to output +// byte streams. Encoding classes are parameterized macros used by +// Machine Instruction Nodes in order to generate the bit encoding of the +// instruction. Operands specify their base encoding interface with the +// interface keyword. There are currently supported four interfaces, +// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an +// operand to generate a function which returns its register number when +// queried. CONST_INTER causes an operand to generate a function which +// returns the value of the constant when queried. MEMORY_INTER causes an +// operand to generate four functions which return the Base Register, the +// Index Register, the Scale Value, and the Offset Value of the operand when +// queried. COND_INTER causes an operand to generate six functions which +// return the encoding code (ie - encoding bits for the instruction) +// associated with each basic boolean condition for a conditional instruction. +// +// Instructions specify two basic values for encoding. Again, a function +// is available to check if the constant displacement is an oop. They use the +// ins_encode keyword to specify their encoding classes (which must be +// a sequence of enc_class names, and their parameters, specified in +// the encoding block), and they use the +// opcode keyword to specify, in order, their primary, secondary, and +// tertiary opcode. Only the opcode sections which a particular instruction +// needs for encoding need to be specified. +encode %{ + enc_class call_epilog %{ + // nothing + %} + + enc_class Java_To_Runtime (method meth) %{ + // CALL directly to the runtime + emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec()); + %} + + enc_class Java_Static_Call (method meth) %{ + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine + // who we intended to call. + + if ( !_method) { + emit_call_reloc(cbuf, as_MachCall(), $meth, runtime_call_Relocation::spec()); + } else { + int method_index = resolved_method_index(cbuf); + RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) + : static_call_Relocation::spec(method_index); + emit_call_reloc(cbuf, as_MachCall(), $meth, rspec); + + // Emit stubs for static call. + address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); + if (stub == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + } + %} + + enc_class save_last_PC %{ + // preserve mark + address mark = cbuf.insts()->mark(); + debug_only(int off0 = cbuf.insts_size()); + MacroAssembler _masm(&cbuf); + int ret_addr_offset = as_MachCall()->ret_addr_offset(); + __ adr(lr, mark + ret_addr_offset); + __ str(lr, Address(Rthread, JavaThread::last_Java_pc_offset())); + debug_only(int off1 = cbuf.insts_size()); + assert(off1 - off0 == 2 * Assembler::InstructionSize, "correct size prediction"); + // restore mark + cbuf.insts()->set_mark(mark); + %} + + enc_class preserve_SP %{ + // preserve mark + address mark = cbuf.insts()->mark(); + debug_only(int off0 = cbuf.insts_size()); + MacroAssembler _masm(&cbuf); + // FP is preserved across all calls, even compiled calls. + // Use it to preserve SP in places where the callee might change the SP. + __ mov(Rmh_SP_save, sp); + debug_only(int off1 = cbuf.insts_size()); + assert(off1 - off0 == 4, "correct size prediction"); + // restore mark + cbuf.insts()->set_mark(mark); + %} + + enc_class restore_SP %{ + MacroAssembler _masm(&cbuf); + __ mov(sp, Rmh_SP_save); + %} + + enc_class Java_Dynamic_Call (method meth) %{ + MacroAssembler _masm(&cbuf); + Register R12_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode()); + assert(R12_ic_reg == rscratch2/*Ricklass*/, "should be"); + __ set_inst_mark(); + __ movw_i(R12_ic_reg, ((unsigned int)Universe::non_oop_word()) & 0xffff); + __ movt_i(R12_ic_reg, ((unsigned int)Universe::non_oop_word()) >> 16); + address virtual_call_oop_addr = __ inst_mark(); + // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine + // who we intended to call. + int method_index = resolved_method_index(cbuf); + emit_call_reloc(cbuf, as_MachCall(), $meth, virtual_call_Relocation::spec(virtual_call_oop_addr, method_index)); + %} + + enc_class LdReplImmI(immI src, regD dst, iRegI tmp, int cnt, int wth) %{ + // FIXME: load from constant table? + // Load a constant replicated "count" times with width "width" + int count = $cnt$$constant; + int width = $wth$$constant; + assert(count*width == 4, "sanity"); + int val = $src$$constant; + if (width < 4) { + int bit_width = width * 8; + val &= (((int)1) << bit_width) - 1; // mask off sign bits + for (int i = 0; i < count - 1; i++) { + val |= (val << bit_width); + } + } + MacroAssembler _masm(&cbuf); + + if (val == -1) { + __ mvn_i($tmp$$Register, 0); + } else if (val == 0) { + __ mov_i($tmp$$Register, 0); + } else { + __ movw_i($tmp$$Register, val & 0xffff); + __ movt_i($tmp$$Register, (unsigned int)val >> 16); + } + __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + %} + + enc_class LdReplImmF(immF src, regD dst, iRegI tmp) %{ + // Replicate float con 2 times and pack into vector (8 bytes) in regD. + float fval = $src$$constant; + int val = *((int*)&fval); + MacroAssembler _masm(&cbuf); + + if (val == -1) { + __ mvn_i($tmp$$Register, 0); + } else if (val == 0) { + __ mov_i($tmp$$Register, 0); + } else { + __ movw_i($tmp$$Register, val & 0xffff); + __ movt_i($tmp$$Register, (unsigned int)val >> 16); + } + __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + %} + + enc_class enc_String_Compare(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, + iRegI tmp1, iRegI tmp2, Q0_regD ftmp1, Q1_regD ftmp2, + int bytes_per_char1, int bytes_per_char2) %{ + MacroAssembler _masm(&cbuf); + + Register str1 = $str1$$Register; + Register str2 = $str2$$Register; + Register cnt1 = $cnt1$$Register; + Register cnt2 = $cnt2$$Register; + Register tmp1 = $tmp1$$Register; + Register tmp2 = $tmp2$$Register; + FloatRegister ftmp1 = $ftmp1$$FloatRegister; + FloatRegister ftmp2 = $ftmp2$$FloatRegister; + Register result = $result$$Register; + int bytes_per_char1 = $bytes_per_char1; + int bytes_per_char2 = $bytes_per_char2; + + typedef void (Assembler::*ldfp)(Register, const Address &, Assembler::Condition); + typedef void (Assembler::*usubp)(Register, Register, Register, Assembler::Condition); + ldfp ldf_16 = &Assembler::ldrh; + ldfp ldf_8 = &Assembler::ldrb; + + // slow path: single char load + int cnt_per_char = bytes_per_char1==2 && bytes_per_char2==2 ? 2 : 1; + ldfp lds1 = bytes_per_char1 == 2 ? ldf_16 : ldf_8; + ldfp lds2 = bytes_per_char2 == 2 ? ldf_16 : ldf_8; + usubp usub = bytes_per_char1 == 1 ? (usubp)&Assembler::usub8 : (usubp)&Assembler::usub16; + + assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result); + + Label Llength_diff, Ldone, Lshort_loop; + + BLOCK_COMMENT("string_compare {"); + + // for UU we count bytes (saves 1 insn) for others count in chars + if (cnt_per_char == 1 && bytes_per_char1 == 2) + __ lsr(cnt1, cnt1, 1); + if (cnt_per_char == 1 && bytes_per_char2 == 2) + __ lsr(cnt2, cnt2, 1); + + // Compute the minimum of the string lengths and save the difference. + __ subs(tmp1, cnt1, cnt2); + __ mov(cnt2, cnt1, Assembler::LE); // min + + // Check if the strings start at the same location. + __ cmp(str1, str2); + __ b(Llength_diff, Assembler::EQ); + + // without NEON only for UU and LL fast path is available + if ((VM_Version::features() & FT_AdvSIMD) || bytes_per_char1 == bytes_per_char2) { + Label Lshort_string, Lnext_word, Ldifference; + + // A very short string + __ cmp(cnt2, 8+4); + __ b(Lshort_string, Assembler::LT); + + // Compare words + { + const int bits_per_char = bytes_per_char1==1 && bytes_per_char2==1 ? 8 : 16; + // Check first few chars to avoid excessive processing + if (bytes_per_char1 == 1 && bytes_per_char2 == 1) { + Label Lfull_speed; + __ ldr(tmp2, __ post(str1, wordSize)); + __ ldr(result, __ post(str2, wordSize)); + (_masm.*usub)(result, tmp2, result, Assembler::AL); + __ tst(result, result); + __ b(Lfull_speed, Assembler::EQ); + + __ rbit(cnt1, result); + __ clz(cnt1, cnt1); + __ bic(cnt1, cnt1, bits_per_char-1); + __ lsr(result, result, cnt1); + __ lsr(tmp2, tmp2, cnt1); + __ ubfx(result, result, 0, bits_per_char); + __ ubfx(tmp2, tmp2, 0, bits_per_char); + __ cmp(result, tmp2); + __ sub(result, result, 1< Matcher::_in_arg_limit, unaligned +// h ^ | in | 5 +// | | args | 4 Holes in incoming args owned by SELF +// | | | | 3 +// | | +--------+ +// V | | old out| Empty on Intel, window on Sparc +// | old |preserve| Must be even aligned. +// | SP-+--------+----> Matcher::_old_SP, 8 (or 16 in LP64)-byte aligned +// | | in | 3 area for Intel ret address +// Owned by |preserve| Empty on Sparc. +// SELF +--------+ +// | | pad2 | 2 pad to align old SP +// | +--------+ 1 +// | | locks | 0 +// | +--------+----> VMRegImpl::stack0, 8 (or 16 in LP64)-byte aligned +// | | pad1 | 11 pad to align new SP +// | +--------+ +// | | | 10 +// | | spills | 9 spills +// V | | 8 (pad0 slot for callee) +// -----------+--------+----> Matcher::_out_arg_limit, unaligned +// ^ | out | 7 +// | | args | 6 Holes in outgoing args owned by CALLEE +// Owned by +--------+ +// CALLEE | new out| 6 Empty on Intel, window on Sparc +// | new |preserve| Must be even-aligned. +// | SP-+--------+----> Matcher::_new_SP, even aligned +// | | | +// +// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is +// known from SELF's arguments and the Java calling convention. +// Region 6-7 is determined per call site. +// Note 2: If the calling convention leaves holes in the incoming argument +// area, those holes are owned by SELF. Holes in the outgoing area +// are owned by the CALLEE. Holes should not be nessecary in the +// incoming area, as the Java calling convention is completely under +// the control of the AD file. Doubles can be sorted and packed to +// avoid holes. Holes in the outgoing arguments may be nessecary for +// varargs C calling conventions. +// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is +// even aligned with pad0 as needed. +// Region 6 is even aligned. Region 6-7 is NOT even aligned; +// region 6-11 is even aligned; it may be padded out more so that +// the region from SP to FP meets the minimum stack alignment. + +frame %{ + // What direction does stack grow in (assumed to be same for native & Java) + stack_direction(TOWARDS_LOW); + + // These two registers define part of the calling convention + // between compiled code and the interpreter. + inline_cache_reg(R_Ricklass); // Inline Cache Register or Method* for I2C + interpreter_method_oop_reg(R_Rmethod); // Method Oop Register when calling interpreter + + // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] + cisc_spilling_operand_name(indOffset); + + // Number of stack slots consumed by a Monitor enter + sync_stack_slots(1 * VMRegImpl::slots_per_word); + + // Compiled code's Frame Pointer + frame_pointer(R_R13); + + // Stack alignment requirement + stack_alignment(StackAlignmentInBytes); + // LP64: Alignment size in bytes (128-bit -> 16 bytes) + // !LP64: Alignment size in bytes (64-bit -> 8 bytes) + + // Number of stack slots between incoming argument block and the start of + // a new frame. The PROLOG must add this many slots to the stack. The + // EPILOG must remove this many slots. + // FP + LR + in_preserve_stack_slots(2 * VMRegImpl::slots_per_word); + + // Number of outgoing stack slots killed above the out_preserve_stack_slots + // for calls to C. Supports the var-args backing area for register parms. + // ADLC doesn't support parsing expressions, so I folded the math by hand. + varargs_C_out_slots_killed( 0); + + // The after-PROLOG location of the return address. Location of + // return address specifies a type (REG or STACK) and a number + // representing the register number (i.e. - use a register name) or + // stack slot. + // Ret Addr is on stack in slot 0 if no locks or verification or alignment. + // Otherwise, it is above the locks and verification slot and alignment word + return_addr(STACK - 1*VMRegImpl::slots_per_word + + align_up((Compile::current()->in_preserve_stack_slots() + + Compile::current()->fixed_slots()), + stack_alignment_in_slots())); + + // Body of function which returns an OptoRegs array locating + // arguments either in registers or in stack slots for calling + // java + calling_convention %{ + (void) SharedRuntime::java_calling_convention(sig_bt, regs, length, is_outgoing); + + %} + + // Body of function which returns an OptoRegs array locating + // arguments either in registers or in stack slots for callin + // C. + c_calling_convention %{ + // This is obviously always outgoing + (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); + %} + + // Location of compiled Java return values. + return_value %{ + return c2::return_value(ideal_reg); + %} + + // Location of C return values. + c_return_value %{ +#ifndef HARD_FLOAT_CC + return c2::c_return_value(ideal_reg); +#else + return c2::return_value(ideal_reg); +#endif + %} + +%} + +//----------ATTRIBUTES--------------------------------------------------------- +//----------Instruction Attributes--------------------------------------------- +ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute +ins_attrib ins_size(32); // Required size attribute (in bits) +ins_attrib ins_short_branch(0); // Required flag: is this instruction a + // non-matching short branch variant of some + // long branch? + +//----------OPERANDS----------------------------------------------------------- +// Operand definitions must precede instruction definitions for correct parsing +// in the ADLC because operands constitute user defined types which are used in +// instruction definitions. + +//----------Simple Operands---------------------------------------------------- +// Immediate Operands +// Integer Immediate: 32-bit +operand immI() %{ + match(ConI); + + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 8-bit unsigned - for VMOV +operand immU8() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 255)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 16-bit +operand immI16() %{ + predicate((n->get_int() >> 16) == 0 && (VM_Version::features() & FT_ARMV6T2)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: offset for half and double word loads and stores +operand immIHD() %{ + predicate(is_memoryHD(n->get_int())); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: offset for fp loads and stores +operand immIFP() %{ + predicate(is_memoryfp(n->get_int()) && ((n->get_int() & 3) == 0)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Valid scale values for addressing modes and shifts +operand immU5() %{ + predicate(0 <= n->get_int() && (n->get_int() <= 31)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 6-bit +operand immU6Big() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: 0-bit +operand immI0() %{ + predicate(n->get_int() == 0); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 1 +operand immI_1() %{ + predicate(n->get_int() == 1); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 2 +operand immI_2() %{ + predicate(n->get_int() == 2); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 3 +operand immI_3() %{ + predicate(n->get_int() == 3); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 4 +operand immI_4() %{ + predicate(n->get_int() == 4); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 8 +operand immI_8() %{ + predicate(n->get_int() == 8); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Int Immediate non-negative +operand immU31() +%{ + predicate(n->get_int() >= 0); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the values 32-63 +operand immI_32_63() %{ + predicate(n->get_int() >= 32 && n->get_int() <= 63); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Immediates for special shifts (sign extend) + +// Integer Immediate: the value 16 +operand immI_16() %{ + predicate(n->get_int() == 16); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 24 +operand immI_24() %{ + predicate(n->get_int() == 24); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 255 +operand immI_255() %{ + predicate( n->get_int() == 255 ); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediate: the value 65535 +operand immI_65535() %{ + predicate(n->get_int() == 65535); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediates for arithmetic instructions + +operand aimmI() %{ + predicate(is_aimm(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand aimmIneg() %{ + predicate(is_aimm(-n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand aimmU31() %{ + predicate((0 <= n->get_int()) && is_aimm(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Integer Immediates for logical instructions + +operand limmI() %{ + predicate(is_limmI(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand limmIlow8() %{ + predicate(is_limmI_low(n->get_int(), 8)); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand limmU31() %{ + predicate(0 <= n->get_int() && is_limmI(n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand limmIn() %{ + predicate(is_limmI(~n->get_int())); + match(ConI); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: the value FF +operand immL_FF() %{ + predicate( n->get_long() == 0xFFL ); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: the value FFFF +operand immL_FFFF() %{ + predicate( n->get_long() == 0xFFFFL ); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate: 32 or 64-bit +operand immP() %{ + match(ConP); + + op_cost(5); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immP0() %{ + predicate(n->get_ptr() == 0); + match(ConP); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +operand immP_poll() %{ + predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); + match(ConP); + + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Pointer Immediate +operand immN() +%{ + match(ConN); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +operand immNKlass() +%{ + match(ConNKlass); + + op_cost(10); + format %{ %} + interface(CONST_INTER); +%} + +// NULL Pointer Immediate +operand immN0() +%{ + predicate(n->get_narrowcon() == 0); + match(ConN); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immL() %{ + match(ConL); + op_cost(40); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +operand immL0() %{ + predicate(n->get_long() == 0L); + match(ConL); + op_cost(0); + // formats are generated automatically for constants and base registers + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: 16-bit +operand immL16() %{ + predicate(n->get_long() >= 0 && n->get_long() < (1<<16) && (VM_Version::features() & FT_ARMV6T2)); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Long Immediate: low 32-bit mask +operand immL_32bits() %{ + predicate(n->get_long() == 0xFFFFFFFFL); + match(ConL); + op_cost(0); + + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate +operand immD() %{ + match(ConD); + + op_cost(40); + format %{ %} + interface(CONST_INTER); +%} + +// Double Immediate: +0.0d. +operand immD0() %{ + predicate(jlong_cast(n->getd()) == 0); + + match(ConD); + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand imm8D() %{ + predicate(Assembler::operand_valid_for_double_immediate(n->getd())); + match(ConD); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate +operand immF() %{ + match(ConF); + + op_cost(20); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate: +0.0f +operand immF0() %{ + predicate(jint_cast(n->getf()) == 0); + match(ConF); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Float Immediate: encoded as 8 bits +operand imm8F() %{ + predicate(Assembler::operand_valid_for_float_immediate(n->getf())); + match(ConF); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +// Integer Register Operands +// Integer Register +operand iRegI() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegI); + match(R0RegI); + match(R1RegI); + match(R2RegI); + match(R3RegI); + match(R12RegI); + + format %{ %} + interface(REG_INTER); +%} + +// Pointer Register +operand iRegP() %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(RegP); + match(R0RegP); + match(R1RegP); + match(R2RegP); + match(RExceptionRegP); + match(RmethodRegP); // R8 + match(R9RegP); + match(RthreadRegP); // R10, TODO Oracle FIXME: move to sp_ptr_RegP? + match(R12RegP); + match(LRRegP); + + match(sp_ptr_RegP); + match(store_ptr_RegP); + + format %{ %} + interface(REG_INTER); +%} + +// GPRs + Rmethod + Rthread + SP +operand sp_ptr_RegP() %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(RegP); + match(iRegP); + match(SPRegP); // FIXME: check cost + + format %{ %} + interface(REG_INTER); +%} + +operand R0RegP() %{ + constraint(ALLOC_IN_RC(R0_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand R1RegP() %{ + constraint(ALLOC_IN_RC(R1_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand R2RegP() %{ + constraint(ALLOC_IN_RC(R2_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand RExceptionRegP() %{ + constraint(ALLOC_IN_RC(Rexception_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand RthreadRegP() %{ + constraint(ALLOC_IN_RC(Rthread_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand RmethodRegP() %{ + constraint(ALLOC_IN_RC(Rmethod_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand IPRegP() %{ + constraint(ALLOC_IN_RC(IP_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand LRRegP() %{ + constraint(ALLOC_IN_RC(LR_regP)); + match(iRegP); + + format %{ %} + interface(REG_INTER); +%} + +operand R0RegI() %{ + constraint(ALLOC_IN_RC(R0_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand R1RegI() %{ + constraint(ALLOC_IN_RC(R1_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand R2RegI() %{ + constraint(ALLOC_IN_RC(R2_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand R3RegI() %{ + constraint(ALLOC_IN_RC(R3_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand R9RegI() %{ + constraint(ALLOC_IN_RC(R9_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +operand R12RegI() %{ + constraint(ALLOC_IN_RC(R12_regI)); + match(iRegI); + + format %{ %} + interface(REG_INTER); +%} + +// Long Register +operand iRegL() %{ + constraint(ALLOC_IN_RC(long_reg)); + match(RegL); + match(R0R1RegL); + match(R2R3RegL); + + format %{ %} + interface(REG_INTER); +%} + +operand iRegLd() %{ + constraint(ALLOC_IN_RC(long_reg_align)); + match(iRegL); // FIXME: allows unaligned R11/R12? + + format %{ %} + interface(REG_INTER); +%} + +// first long arg, or return value +operand R0R1RegL() %{ + constraint(ALLOC_IN_RC(R0R1_regL)); + match(iRegL); + + format %{ %} + interface(REG_INTER); +%} + +operand R2R3RegL() %{ + constraint(ALLOC_IN_RC(R2R3_regL)); + match(iRegL); + + format %{ %} + interface(REG_INTER); +%} + +// Condition Code Flag Register +operand flagsReg() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr" %} + interface(REG_INTER); +%} + +// Result of compare to 0 (TST) +operand flagsReg_EQNELTGE() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_EQNELTGE" %} + interface(REG_INTER); +%} + +// Condition Code Register, unsigned comparisons. +operand flagsRegU() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); +#ifdef TODO + match(RegFlagsP); +#endif + + format %{ "apsr_U" %} + interface(REG_INTER); +%} + +// Condition Code Register, pointer comparisons. +operand flagsRegP() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_P" %} + interface(REG_INTER); +%} + +// Condition Code Register, long comparisons. +operand flagsRegL_LTGE() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_L_LTGE" %} + interface(REG_INTER); +%} + +operand flagsRegUL() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_UL" %} + interface(REG_INTER); +%} + +operand flagsRegL_EQNE() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_L_EQNE" %} + interface(REG_INTER); +%} + +operand flagsRegL_LEGT() %{ + constraint(ALLOC_IN_RC(int_flags)); + match(RegFlags); + + format %{ "apsr_L_LEGT" %} + interface(REG_INTER); +%} + +// Condition Code Register, floating comparisons, unordered same as "less". +operand flagsRegF() %{ + constraint(ALLOC_IN_RC(float_flags)); + match(RegFlags); + + format %{ "fpscr_F" %} + interface(REG_INTER); +%} + +// Vectors +operand vecD() %{ + constraint(ALLOC_IN_RC(actual_dflt_reg)); + match(VecD); + + format %{ %} + interface(REG_INTER); +%} + +operand vecX() %{ + constraint(ALLOC_IN_RC(vectorx_reg)); + match(VecX); + + format %{ %} + interface(REG_INTER); +%} + +operand regD() %{ + constraint(ALLOC_IN_RC(actual_dflt_reg)); + match(RegD); + match(regD_low); + + format %{ %} + interface(REG_INTER); +%} + +operand Q0_regD() %{ + constraint(ALLOC_IN_RC(D0D1_regD)); + match(RegD); + match(regD_low); + + format %{ %} + interface(REG_INTER); +%} + +operand Q1_regD() %{ + constraint(ALLOC_IN_RC(D2D3_regD)); + match(RegD); + match(regD_low); + + format %{ %} + interface(REG_INTER); +%} + +operand regF() %{ + constraint(ALLOC_IN_RC(sflt_reg)); + match(RegF); + + format %{ %} + interface(REG_INTER); +%} + +operand regD_low() %{ + constraint(ALLOC_IN_RC(dflt_low_reg)); + match(RegD); + + format %{ %} + interface(REG_INTER); +%} + +// Special Registers + +// Method Register +operand inline_cache_regP(iRegP reg) %{ + constraint(ALLOC_IN_RC(Ricklass_regP)); + match(reg); + format %{ %} + interface(REG_INTER); +%} + +operand interpreter_method_oop_regP(iRegP reg) %{ + constraint(ALLOC_IN_RC(Rmethod_regP)); + match(reg); + format %{ %} + interface(REG_INTER); +%} + + +//----------Complex Operands--------------------------------------------------- +// Indirect Memory Reference +operand indirect(sp_ptr_RegP reg) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(reg); + + op_cost(100); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xf); // PC => no index + scale(0x0); + disp(0x0); + %} +%} + +// Indirect with Offset in ]-4096, 4096[ +operand indOffset12(sp_ptr_RegP reg, immI12 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xf); // PC => no index + scale(0x0); + disp($offset); + %} +%} + +// Indirect with offset for float load/store +operand indOffsetFP(sp_ptr_RegP reg, immIFP offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xf); // PC => no index + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Offset for half and double words +operand indOffsetHD(sp_ptr_RegP reg, immIHD offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xf); // PC => no index + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Offset and Offset+4 in ]-1024, 1024[ +operand indOffsetFPx2(sp_ptr_RegP reg, immX10x2 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xf); // PC => no index + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Offset and Offset+4 in ]-4096, 4096[ +operand indOffset12x2(sp_ptr_RegP reg, immI12x2 offset) %{ + constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(AddP reg offset); + + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0xf); // PC => no index + scale(0x0); + disp($offset); + %} +%} + +// Indirect with Register Index +operand indIndex(iRegP addr, iRegX index) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr index); + + op_cost(100); + format %{ "[$addr + $index]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale(0x0); + disp(0x0); + %} +%} + +// Indirect Memory Times Scale Plus Index Register +operand indIndexScale(iRegP addr, iRegX index, immU5 scale) %{ + constraint(ALLOC_IN_RC(ptr_reg)); + match(AddP addr (LShiftX index scale)); + + op_cost(100); + format %{"[$addr + $index << $scale]" %} + interface(MEMORY_INTER) %{ + base($addr); + index($index); + scale($scale); + disp(0x0); + %} +%} + +// Operands for expressing Control Flow +// NOTE: Label is a predefined operand which should not be redefined in +// the AD file. It is generically handled within the ADLC. + +//----------Conditional Branch Operands---------------------------------------- +// Comparison Op - This is the operation of the comparison, and is limited to +// the following set of codes: +// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) +// +// Other attributes of the comparison, such as unsignedness, are specified +// by the comparison instruction that sets a condition code flags register. +// That result is represented by a flags operand whose subtype is appropriate +// to the unsignedness (etc.) of the comparison. +// +// Later, the instruction which matches both the Comparison Op (a Bool) and +// the flags (produced by the Cmp) specifies the coding of the comparison op +// by matching a specific subtype of Bool operand below, such as cmpOpU. + +operand cmpOp() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0xb); + greater_equal(0xa); + less_equal(0xd); + greater(0xc); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +// integer comparison with 0, signed +operand cmpOp0() %{ + match(Bool); + + format %{ "" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0x4); + greater_equal(0x5); + less_equal(0xd); // unsupported + greater(0xc); // unsupported + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +// Comparison Op, unsigned +operand cmpOpU() %{ + match(Bool); + + format %{ "u" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0x3); + greater_equal(0x2); + less_equal(0x9); + greater(0x8); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +// Comparison Op, pointer (same as unsigned) +operand cmpOpP() %{ + match(Bool); + + format %{ "p" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0x3); + greater_equal(0x2); + less_equal(0x9); + greater(0x8); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +operand cmpOpL() %{ + match(Bool); + + format %{ "L" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0xb); + greater_equal(0xa); + less_equal(0xd); + greater(0xc); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +operand cmpOpL_commute() %{ + match(Bool); + + format %{ "L" %} + interface(COND_INTER) %{ + equal(0x0); + not_equal(0x1); + less(0xc); + greater_equal(0xd); + less_equal(0xa); + greater(0xb); + overflow(0x0); // unsupported/unimplemented + no_overflow(0x0); // unsupported/unimplemented + %} +%} + +//----------OPERAND CLASSES---------------------------------------------------- +// Operand Classes are groups of operands that are used to simplify +// instruction definitions by not requiring the AD writer to specify separate +// instructions for every form of operand when the instruction accepts +// multiple operand types with the same basic encoding and format. The classic +// case of this is memory operands. +opclass memoryI ( indirect, indOffset12, indIndex, indIndexScale ); +opclass memoryP ( indirect, indOffset12, indIndex, indIndexScale ); +opclass memoryF ( indirect, indOffsetFP ); +opclass memoryF2 ( indirect, indOffsetFPx2 ); +opclass memoryD ( indirect, indOffsetFP ); +opclass memoryfp( indirect, indOffsetFP ); +opclass memoryB ( indirect, indIndex, indOffsetHD ); +opclass memoryS ( indirect, indIndex, indOffsetHD ); +opclass memoryL ( indirect, indIndex, indOffsetHD ); + +opclass memoryScaledI(indIndexScale); +opclass memoryScaledP(indIndexScale); + +// when ldrex/strex is used: +opclass memoryex ( indirect ); +opclass indIndexMemory( indIndex ); +opclass memorylong ( indirect, indOffset12x2 ); +opclass memoryvld ( indirect /* , write back mode not implemented */ ); + +//----------PIPELINE----------------------------------------------------------- +pipeline %{ + +//----------ATTRIBUTES--------------------------------------------------------- +attributes %{ + fixed_size_instructions; // Fixed size instructions + max_instructions_per_bundle = 4; // Up to 4 instructions per bundle + instruction_unit_size = 4; // An instruction is 4 bytes long + instruction_fetch_unit_size = 16; // The processor fetches one line + instruction_fetch_units = 1; // of 16 bytes + + // List of nop instructions + nops( Nop_A0, Nop_A1, Nop_MS, Nop_FA, Nop_BR ); +%} + +//----------RESOURCES---------------------------------------------------------- +// Resources are the functional units available to the machine +resources(A0, A1, MS, BR, FA, FM, IDIV, FDIV, IALU = A0 | A1); + +//----------PIPELINE DESCRIPTION----------------------------------------------- +// Pipeline Description specifies the stages in the machine's pipeline + +pipe_desc(A, P, F, B, I, J, S, R, E, C, M, W, X, T, D); + +//----------PIPELINE CLASSES--------------------------------------------------- +// Pipeline Classes describe the stages in which input and output are +// referenced by the hardware pipeline. + +// Integer ALU reg-reg operation +pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + single_instruction; + dst : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg long operation +pipe_class ialu_reg_reg_2(iRegL dst, iRegL src1, iRegL src2) %{ + instruction_count(2); + dst : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; + IALU : R; +%} + +// Integer ALU reg-reg long dependent operation +pipe_class ialu_reg_reg_2_dep(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : E(write); + src1 : R(read); + src2 : R(read); + cr : E(write); + IALU : R(2); +%} + +// Integer ALU reg-imm operaion +pipe_class ialu_reg_imm(iRegI dst, iRegI src1) %{ + single_instruction; + dst : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg operation with condition code +pipe_class ialu_cc_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{ + single_instruction; + dst : E(write); + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU zero-reg operation +pipe_class ialu_zero_reg(iRegI dst, immI0 zero, iRegI src2) %{ + single_instruction; + dst : E(write); + src2 : R(read); + IALU : R; +%} + +// Integer ALU zero-reg operation with condition code only +pipe_class ialu_cconly_zero_reg(flagsReg cr, iRegI src) %{ + single_instruction; + cr : E(write); + src : R(read); + IALU : R; +%} + +// Integer ALU reg-reg operation with condition code only +pipe_class ialu_cconly_reg_reg(flagsReg cr, iRegI src1, iRegI src2) %{ + single_instruction; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-imm operation with condition code only +pipe_class ialu_cconly_reg_imm(flagsReg cr, iRegI src1) %{ + single_instruction; + cr : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg-zero operation with condition code only +pipe_class ialu_cconly_reg_reg_zero(flagsReg cr, iRegI src1, iRegI src2, immI0 zero) %{ + single_instruction; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU reg-imm-zero operation with condition code only +pipe_class ialu_cconly_reg_imm_zero(flagsReg cr, iRegI src1, immI0 zero) %{ + single_instruction; + cr : E(write); + src1 : R(read); + IALU : R; +%} + +// Integer ALU reg-reg operation with condition code, src1 modified +pipe_class ialu_cc_rwreg_reg(flagsReg cr, iRegI src1, iRegI src2) %{ + single_instruction; + cr : E(write); + src1 : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +pipe_class cmpL_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr ) %{ + multiple_bundles; + dst : E(write)+4; + cr : E(write); + src1 : R(read); + src2 : R(read); + IALU : R(3); + BR : R(2); +%} + +// Integer ALU operation +pipe_class ialu_none(iRegI dst) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +// Integer ALU reg operation +pipe_class ialu_reg(iRegI dst, iRegI src) %{ + single_instruction; may_have_no_code; + dst : E(write); + src : R(read); + IALU : R; +%} + +// Integer ALU reg conditional operation +// This instruction has a 1 cycle stall, and cannot execute +// in the same cycle as the instruction setting the condition +// code. We kludge this by pretending to read the condition code +// 1 cycle earlier, and by marking the functional units as busy +// for 2 cycles with the result available 1 cycle later than +// is really the case. +pipe_class ialu_reg_flags( iRegI op2_out, iRegI op2_in, iRegI op1, flagsReg cr ) %{ + single_instruction; + op2_out : C(write); + op1 : R(read); + cr : R(read); // This is really E, with a 1 cycle stall + BR : R(2); + MS : R(2); +%} + +// Integer ALU reg operation +pipe_class ialu_move_reg_L_to_I(iRegI dst, iRegL src) %{ + single_instruction; may_have_no_code; + dst : E(write); + src : R(read); + IALU : R; +%} +pipe_class ialu_move_reg_I_to_L(iRegL dst, iRegI src) %{ + single_instruction; may_have_no_code; + dst : E(write); + src : R(read); + IALU : R; +%} + +// Two integer ALU reg operations +pipe_class ialu_reg_2(iRegL dst, iRegL src) %{ + instruction_count(2); + dst : E(write); + src : R(read); + A0 : R; + A1 : R; +%} + +// Two integer ALU reg operations +pipe_class ialu_move_reg_L_to_L(iRegL dst, iRegL src) %{ + instruction_count(2); may_have_no_code; + dst : E(write); + src : R(read); + A0 : R; + A1 : R; +%} + +// Integer ALU imm operation +pipe_class ialu_imm(iRegI dst) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +pipe_class ialu_imm_n(iRegI dst) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +// Integer ALU reg-reg with carry operation +pipe_class ialu_reg_reg_cy(iRegI dst, iRegI src1, iRegI src2, iRegI cy) %{ + single_instruction; + dst : E(write); + src1 : R(read); + src2 : R(read); + IALU : R; +%} + +// Integer ALU cc operation +pipe_class ialu_cc(iRegI dst, flagsReg cc) %{ + single_instruction; + dst : E(write); + cc : R(read); + IALU : R; +%} + +// Integer ALU cc / second IALU operation +pipe_class ialu_reg_ialu( iRegI dst, iRegI src ) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + src : R(read); + IALU : R; +%} + +// Integer ALU cc / second IALU operation +pipe_class ialu_reg_reg_ialu( iRegI dst, iRegI p, iRegI q ) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + p : R(read); + q : R(read); + IALU : R; +%} + +// Integer ALU hi-lo-reg operation +pipe_class ialu_hi_lo_reg(iRegI dst, immI src) %{ + instruction_count(1); multiple_bundles; + dst : E(write)+1; + IALU : R(2); +%} + +// Long Constant +pipe_class loadConL( iRegL dst, immL src ) %{ + instruction_count(2); multiple_bundles; + dst : E(write)+1; + IALU : R(2); + IALU : R(2); +%} + +// Pointer Constant +pipe_class loadConP( iRegP dst, immP src ) %{ + instruction_count(0); multiple_bundles; + fixed_latency(6); +%} + +// Polling Address +pipe_class loadConP_poll( iRegP dst, immP_poll src ) %{ + dst : E(write); + IALU : R; +%} + +// Long Constant small +pipe_class loadConLlo( iRegL dst, immL src ) %{ + instruction_count(2); + dst : E(write); + IALU : R; + IALU : R; +%} + +// [PHH] This is wrong for 64-bit. See LdImmF/D. +pipe_class loadConFD(regF dst, immF src, iRegP tmp) %{ + instruction_count(1); multiple_bundles; + src : R(read); + dst : M(write)+1; + IALU : R; + MS : E; +%} + +// Integer ALU nop operation +pipe_class ialu_nop() %{ + single_instruction; + IALU : R; +%} + +// Integer ALU nop operation +pipe_class ialu_nop_A0() %{ + single_instruction; + A0 : R; +%} + +// Integer ALU nop operation +pipe_class ialu_nop_A1() %{ + single_instruction; + A1 : R; +%} + +// Integer Multiply reg-reg operation +pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + single_instruction; + dst : E(write); + src1 : R(read); + src2 : R(read); + MS : R(5); +%} + +pipe_class mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + single_instruction; + dst : E(write)+4; + src1 : R(read); + src2 : R(read); + MS : R(6); +%} + +// Integer Divide reg-reg +pipe_class sdiv_reg_reg_IDIV(iRegI dst, iRegI src1, iRegI src2, iRegI temp, flagsReg cr) %{ + single_instruction; + dst : E(write); + temp : E(write); + src1 : R(read); + src2 : R(read); + temp : R(read); + MS : R(10); +%} + +pipe_class sdiv_reg_reg_SW(iRegI dst, iRegI src1, iRegI src2, iRegI temp1, iRegI temp2, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : E(write); + temp1 : E(write); + temp2 : E(write); + src1 : R(read); + src2 : R(read); + temp1 : R(read); + temp2 : R(read); + MS : R(38); +%} + +// Long Divide +pipe_class divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + dst : E(write)+71; + src1 : R(read); + src2 : R(read)+1; + MS : R(70); +%} + +// Floating Point Add Float +pipe_class faddF_reg_reg(regF dst, regF src1, regF src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Point Add Double +pipe_class faddD_reg_reg(regD dst, regD src1, regD src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Point Conditional Move based on integer flags +pipe_class int_conditional_float_move (cmpOp cmp, flagsReg cr, regF dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + cr : R(read); + FA : R(2); + BR : R(2); +%} + +// Floating Point Conditional Move based on integer flags +pipe_class int_conditional_double_move (cmpOp cmp, flagsReg cr, regD dst, regD src) %{ + single_instruction; + dst : X(write); + src : E(read); + cr : R(read); + FA : R(2); + BR : R(2); +%} + +// Floating Point Multiply Float +pipe_class fmulF_reg_reg(regF dst, regF src1, regF src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; +%} + +// Floating Point Multiply Double +pipe_class fmulD_reg_reg(regD dst, regD src1, regD src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; +%} + +// Floating Point Divide Float +pipe_class fdivF_reg_reg(regF dst, regF src1, regF src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; + FDIV : C(14); +%} + +// Floating Point Divide Double +pipe_class fdivD_reg_reg(regD dst, regD src1, regD src2) %{ + single_instruction; + dst : X(write); + src1 : E(read); + src2 : E(read); + FM : R; + FDIV : C(17); +%} + +// Floating Point Move/Negate/Abs Float +pipe_class faddF_reg(regF dst, regF src) %{ + single_instruction; + dst : W(write); + src : E(read); + FA : R(1); +%} + +// Floating Point Move/Negate/Abs Double +pipe_class faddD_reg(regD dst, regD src) %{ + single_instruction; + dst : W(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert F->D +pipe_class fcvtF2D(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert I->D +pipe_class fcvtI2D(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert LHi->D +pipe_class fcvtLHi2D(regD dst, regD src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert L->D +pipe_class fcvtL2D(regD dst, iRegL src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert L->F +pipe_class fcvtL2F(regF dst, iRegL src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert D->F +pipe_class fcvtD2F(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert I->L +pipe_class fcvtI2L(regD dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Convert D->F +pipe_class fcvtD2I(iRegI dst, regD src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert D->L +pipe_class fcvtD2L(regD dst, regD src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert F->I +pipe_class fcvtF2I(regF dst, regF src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert F->L +pipe_class fcvtF2L(regD dst, regF src, flagsReg cr) %{ + instruction_count(1); multiple_bundles; + dst : X(write)+6; + src : E(read); + FA : R; +%} + +// Floating Point Convert I->F +pipe_class fcvtI2F(regF dst, regF src) %{ + single_instruction; + dst : X(write); + src : E(read); + FA : R; +%} + +// Floating Point Compare +pipe_class faddF_fcc_reg_reg_zero(flagsRegF cr, regF src1, regF src2, immI0 zero) %{ + single_instruction; + cr : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Point Compare +pipe_class faddD_fcc_reg_reg_zero(flagsRegF cr, regD src1, regD src2, immI0 zero) %{ + single_instruction; + cr : X(write); + src1 : E(read); + src2 : E(read); + FA : R; +%} + +// Floating Add Nop +pipe_class fadd_nop() %{ + single_instruction; + FA : R; +%} + +// Integer Store to Memory +pipe_class istore_mem_reg(memoryI mem, iRegI src) %{ + single_instruction; + mem : R(read); + src : C(read); + MS : R; +%} + +// Integer Store to Memory +pipe_class istore_mem_spORreg(memoryI mem, sp_ptr_RegP src) %{ + single_instruction; + mem : R(read); + src : C(read); + MS : R; +%} + +// Float Store +pipe_class fstoreF_mem_reg(memoryF mem, RegF src) %{ + single_instruction; + mem : R(read); + src : C(read); + MS : R; +%} + +// Float Store +pipe_class fstoreF_mem_zero(memoryF mem, immF0 src) %{ + single_instruction; + mem : R(read); + MS : R; +%} + +// Double Store +pipe_class fstoreD_mem_reg(memoryD mem, RegD src) %{ + instruction_count(1); + mem : R(read); + src : C(read); + MS : R; +%} + +// Double Store +pipe_class fstoreD_mem_zero(memoryD mem, immD0 src) %{ + single_instruction; + mem : R(read); + MS : R; +%} + +// Integer Load (when sign bit propagation not needed) +pipe_class iload_mem(iRegI dst, memoryI mem) %{ + single_instruction; + mem : R(read); + dst : C(write); + MS : R; +%} + +// Integer Load (when sign bit propagation or masking is needed) +pipe_class iload_mask_mem(iRegI dst, memoryI mem) %{ + single_instruction; + mem : R(read); + dst : M(write); + MS : R; +%} + +// Float Load +pipe_class floadF_mem(regF dst, memoryF mem) %{ + single_instruction; + mem : R(read); + dst : M(write); + MS : R; +%} + +// Float Load +pipe_class floadD_mem(regD dst, memoryD mem) %{ + instruction_count(1); multiple_bundles; // Again, unaligned argument is only multiple case + mem : R(read); + dst : M(write); + MS : R; +%} + +// Memory Nop +pipe_class mem_nop() %{ + single_instruction; + MS : R; +%} + +pipe_class sethi(iRegP dst, immI src) %{ + single_instruction; + dst : E(write); + IALU : R; +%} + +pipe_class loadPollP(iRegP poll) %{ + single_instruction; + poll : R(read); + MS : R; +%} + +pipe_class br(Universe br, label labl) %{ + single_instruction_with_delay_slot; + BR : R; +%} + +pipe_class br_cc(Universe br, cmpOp cmp, flagsReg cr, label labl) %{ + single_instruction_with_delay_slot; + cr : E(read); + BR : R; +%} + +pipe_class br_reg(Universe br, cmpOp cmp, iRegI op1, label labl) %{ + single_instruction_with_delay_slot; + op1 : E(read); + BR : R; + MS : R; +%} + +pipe_class br_nop() %{ + single_instruction; + BR : R; +%} + +pipe_class simple_call(method meth) %{ + instruction_count(2); multiple_bundles; force_serialization; + fixed_latency(100); + BR : R(1); + MS : R(1); + A0 : R(1); +%} + +pipe_class compiled_call(method meth) %{ + instruction_count(1); multiple_bundles; force_serialization; + fixed_latency(100); + MS : R(1); +%} + +pipe_class call(method meth) %{ + instruction_count(0); multiple_bundles; force_serialization; + fixed_latency(100); +%} + +pipe_class tail_call(Universe ignore, label labl) %{ + single_instruction; has_delay_slot; + fixed_latency(100); + BR : R(1); + MS : R(1); +%} + +pipe_class ret(Universe ignore) %{ + single_instruction; has_delay_slot; + BR : R(1); + MS : R(1); +%} + +// The real do-nothing guy +pipe_class empty( ) %{ + instruction_count(0); +%} + +pipe_class long_memory_op() %{ + instruction_count(0); multiple_bundles; force_serialization; + fixed_latency(25); + MS : R(1); +%} + +// Check-cast +pipe_class partial_subtype_check_pipe(Universe ignore, iRegP array, iRegP match ) %{ + array : R(read); + match : R(read); + IALU : R(2); + BR : R(2); + MS : R; +%} + +// Convert FPU flags into +1,0,-1 +pipe_class floating_cmp( iRegI dst, regF src1, regF src2 ) %{ + src1 : E(read); + src2 : E(read); + dst : E(write); + FA : R; + MS : R(2); + BR : R(2); +%} + +// Compare for p < q, and conditionally add y +pipe_class cadd_cmpltmask( iRegI p, iRegI q, iRegI y ) %{ + p : E(read); + q : E(read); + y : E(read); + IALU : R(3) +%} + +// Perform a compare, then move conditionally in a branch delay slot. +pipe_class min_max( iRegI src2, iRegI srcdst ) %{ + src2 : E(read); + srcdst : E(read); + IALU : R; + BR : R; +%} + +// Define the class for the Nop node +define %{ + MachNop = ialu_nop; +%} + +%} + +//----------INSTRUCTIONS------------------------------------------------------- + +//------------Special Nop instructions for bundling - no match rules----------- +// Nop using the A0 functional unit +instruct Nop_A0() %{ + ins_pipe(ialu_nop_A0); +%} + +// Nop using the A1 functional unit +instruct Nop_A1( ) %{ + ins_pipe(ialu_nop_A1); +%} + +// Nop using the memory functional unit +instruct Nop_MS( ) %{ + ins_pipe(mem_nop); +%} + +// Nop using the floating add functional unit +instruct Nop_FA( ) %{ + ins_pipe(fadd_nop); +%} + +// Nop using the branch functional unit +instruct Nop_BR( ) %{ + ins_pipe(br_nop); +%} + +//----------Load/Store/Move Instructions--------------------------------------- +//----------Load Instructions-------------------------------------------------- +// Load Byte (8bit signed) +instruct loadB(iRegI dst, memoryB mem) %{ + match(Set dst (LoadB mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRSB $dst,$mem\t! byte -> int" %} + ins_encode %{ + __ ldrsb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Byte (8bit signed) into a Long Register +instruct loadB2L(iRegL dst, memoryB mem) %{ + match(Set dst (ConvI2L (LoadB mem))); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDRSB $dst.lo,$mem\t! byte -> long\n\t" + "ASR $dst.hi,$dst.lo,31" %} + ins_encode %{ + __ ldrsb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), $dst$$Register, asr(31)); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Byte (8bit UNsigned) into an int reg +instruct loadUB(iRegI dst, memoryB mem) %{ + match(Set dst (LoadUB mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRB $dst,$mem\t! ubyte -> int" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Byte (8bit UNsigned) into a Long Register +instruct loadUB2L(iRegL dst, memoryB mem) %{ + match(Set dst (ConvI2L (LoadUB mem))); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDRB $dst.lo,$mem\t! ubyte -> long\n\t" + "MOV $dst.hi,0" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Byte (8 bit UNsigned) with immediate mask into Long Register +instruct loadUB2L_limmI(iRegL dst, memoryB mem, limmIlow8 mask) %{ + match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); + + ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); + size(12); + format %{ "LDRB $dst.lo,$mem\t! ubyte -> long\n\t" + "MOV $dst.hi,0\n\t" + "AND $dst.lo,$dst.lo,$mask" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + __ andr($dst$$Register, $dst$$Register, limmI_low($mask$$constant, 8)); + %} + ins_pipe(iload_mem); +%} + +// Load Short (16bit signed) + +instruct loadS(iRegI dst, memoryS mem) %{ + match(Set dst (LoadS mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRSH $dst,$mem\t! short" %} + ins_encode %{ + __ ldrsh($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Short (16 bit signed) to Byte (8 bit signed) +instruct loadS2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDRSB $dst,$mem\t! short -> byte" %} + ins_encode %{ + // High 32 bits are harmlessly set on Aarch64 + __ ldrsb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Short (16bit signed) into a Long Register +instruct loadS2L(iRegL dst, memoryS mem) %{ + match(Set dst (ConvI2L (LoadS mem))); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDRSH $dst.lo,$mem\t! short -> long\n\t" + "ASR $dst.hi,$dst.lo,31" %} + ins_encode %{ + __ ldrsh($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), $dst$$Register, asr(31)); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) + + +instruct loadUS(iRegI dst, memoryS mem) %{ + match(Set dst (LoadUS mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRH $dst,$mem\t! ushort/char" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) +instruct loadUS2B(iRegI dst, memoryB mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRSB $dst,$mem\t! ushort -> byte" %} + ins_encode %{ + __ ldrsb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) into a Long Register +instruct loadUS2L(iRegL dst, memoryS mem) %{ + match(Set dst (ConvI2L (LoadUS mem))); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDRH $dst.lo,$mem\t! short -> long\n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) with mask 0xFF into a Long Register +instruct loadUS2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{ + match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDRB $dst.lo,$mem\t! \n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Short/Char (16bit UNsigned) with a immediate mask into a Long Register +instruct loadUS2L_limmI(iRegL dst, memoryS mem, limmI mask) %{ + match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); + ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); + + size(12); + format %{ "LDRH $dst,$mem\t! ushort/char & mask -> long\n\t" + "MOV $dst.hi, 0\n\t" + "AND $dst,$dst,$mask" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + __ andr($dst$$Register, $dst$$Register, $mask$$constant); + %} + ins_pipe(iload_mem); +%} + +// Load Integer + +instruct loadI(iRegI dst, memoryI mem) %{ + match(Set dst (LoadI mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "ldr $dst,$mem\t! int" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +// Load Integer to Byte (8 bit signed) +instruct loadI2B(iRegI dst, memoryS mem, immI_24 twentyfour) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDRSB $dst,$mem\t! int -> byte" %} + ins_encode %{ + __ ldrsb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer to Unsigned Byte (8 bit UNsigned) +instruct loadI2UB(iRegI dst, memoryB mem, immI_255 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + ins_cost(MEMORY_REF_COST); + + size(4); + + format %{ "LDRB $dst,$mem\t! int -> ubyte" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer to Short (16 bit signed) +instruct loadI2S(iRegI dst, memoryS mem, immI_16 sixteen) %{ + match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRSH $dst,$mem\t! int -> short" %} + ins_encode %{ + __ ldrsh($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer to Unsigned Short (16 bit UNsigned) +instruct loadI2US(iRegI dst, memoryS mem, immI_65535 mask) %{ + match(Set dst (AndI (LoadI mem) mask)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDRH $dst,$mem\t! int -> ushort/char" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer into a Long Register +instruct loadI2L(iRegL dst, memoryI mem) %{ + match(Set dst (ConvI2L (LoadI mem))); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDR $dst.lo,$mem\t! int -> long\n\t" + "ASR $dst.hi,$dst.lo,31\t! int->long" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), $dst$$Register, asr(31)); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer with mask 0xFF into a Long Register +instruct loadI2L_immI_255(iRegL dst, memoryB mem, immI_255 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDRB $dst.lo,$mem\t! int & 0xFF -> long\n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + __ ldrb($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} + ins_pipe(iload_mem); +%} + +// Load Integer with mask 0xFFFF into a Long Register +instruct loadI2L_immI_65535(iRegL dst, memoryS mem, immI_65535 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDRH $dst,$mem\t! int & 0xFFFF -> long\n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + __ ldrh($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} + ins_pipe(iload_mask_mem); +%} + +// Load Integer with a 31-bit immediate mask into a Long Register +instruct loadI2L_limmU31(iRegL dst, memoryI mem, limmU31 mask) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST); + + size(12); + format %{ "LDR $dst.lo,$mem\t! int -> long\n\t" + "MOV $dst.hi, 0\n\t" + "AND $dst,$dst,$mask" %} + + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + __ andr($dst$$Register, $dst$$Register, $mask$$constant); + %} + ins_pipe(iload_mem); +%} + +// Load Integer with a 31-bit mask into a Long Register +// FIXME: use iRegI mask, remove tmp? +instruct loadI2L_immU31(iRegL dst, memoryI mem, immU31 mask, iRegI tmp) %{ + match(Set dst (ConvI2L (AndI (LoadI mem) mask))); + effect(TEMP dst, TEMP tmp); + + ins_cost(MEMORY_REF_COST + 4*DEFAULT_COST); + size(20); + format %{ "LDR $mem,$dst\t! int & 31-bit mask -> long\n\t" + "MOV $dst.hi, 0\n\t" + "MOV_SLOW $tmp,$mask\n\t" + "AND $dst,$tmp,$dst" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + __ mov($tmp$$Register, $mask$$constant); + __ andr($dst$$Register, $dst$$Register, $tmp$$Register); + %} + ins_pipe(iload_mem); +%} + +// Load Unsigned Integer into a Long Register +instruct loadUI2L(iRegL dst, memoryI mem, immL_32bits mask) %{ + match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDR $dst.lo,$mem\t! uint -> long\n\t" + "MOV $dst.hi,0" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + __ mov($dst$$Register->successor(), 0); + %} + ins_pipe(iload_mem); +%} + +// Load Long + +instruct loadL(iRegLd dst, memoryL mem ) %{ + predicate(!((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + effect(TEMP dst); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "ldrd $dst,$mem\t! long" %} + ins_encode %{ + __ ldrd($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +instruct loadL_2instr(iRegL dst, memorylong mem ) %{ + predicate(!((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + + size(8); + format %{ "LDR $dst.lo,$mem \t! long order of instrs reversed if $dst.lo == base($mem)\n\t" + "LDR $dst.hi,$mem+4 or $mem" %} + ins_encode %{ + Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); + + if ($dst$$Register == reg_to_register_object($mem$$base)) { + __ ldr($dst$$Register->successor(), Amemhi); + __ ldr($dst$$Register, Amemlo); + } else { + __ ldr($dst$$Register, Amemlo); + __ ldr($dst$$Register->successor(), Amemhi); + } + %} + ins_pipe(iload_mem); +%} + +instruct loadL_volatile(iRegL dst, indirect mem ) %{ + predicate(((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDREXD $dst,$mem\t! long" %} + ins_encode %{ + __ atomic_ldrd($dst$$Register, reg_to_register_object($dst$$reg + 1), reg_to_register_object($mem$$base)); + %} + ins_pipe(iload_mem); +%} + +instruct loadL_volatile_fp(iRegL dst, memoryD mem ) %{ + predicate(((LoadLNode*)n)->require_atomic_access()); + match(Set dst (LoadL mem)); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "FLDD S14, $mem" + "FMRRD $dst, S14\t! long \n't" %} + ins_encode %{ + __ vldr_f64(f14, $mem$$Address); + __ vmov_f64($dst$$Register, $dst$$Register->successor(), f14); + %} + ins_pipe(iload_mem); +%} + +instruct loadL_unaligned(iRegL dst, memorylong mem ) %{ + match(Set dst (LoadL_unaligned mem)); + ins_cost(MEMORY_REF_COST); + + size(8); + format %{ "LDR $dst.lo,$mem\t! long order of instrs reversed if $dst.lo == base($mem)\n\t" + "LDR $dst.hi,$mem+4" %} + ins_encode %{ + Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); + + if ($dst$$Register == reg_to_register_object($mem$$base)) { + __ ldr($dst$$Register->successor(), Amemhi); + __ ldr($dst$$Register, Amemlo); + } else { + __ ldr($dst$$Register, Amemlo); + __ ldr($dst$$Register->successor(), Amemhi); + } + %} + ins_pipe(iload_mem); +%} + +// Load Range +instruct loadRange(iRegI dst, memoryI mem) %{ + match(Set dst (LoadRange mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "LDR_u32 $dst,$mem\t! range" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +// Load Pointer + +instruct loadP(iRegP dst, memoryP mem) %{ + match(Set dst (LoadP mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "LDR $dst,$mem\t! ptr" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +// Load Klass Pointer +instruct loadKlass(iRegP dst, memoryI mem) %{ + match(Set dst (LoadKlass mem)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "LDR $dst,$mem\t! klass ptr" %} + ins_encode %{ + __ ldr($dst$$Register, $mem$$Address); + %} + ins_pipe(iload_mem); +%} + +instruct loadD(regD dst, memoryD mem) %{ + match(Set dst (LoadD mem)); + ins_cost(MEMORY_REF_COST); + + size(4); + // FIXME: needs to be atomic, but ARMv7 A.R.M. guarantees + // only LDREXD and STREXD are 64-bit single-copy atomic + format %{ "FLDD $dst,$mem" %} + ins_encode %{ + __ vldr_f64($dst$$FloatRegister, $mem$$Address); + %} + ins_pipe(floadD_mem); +%} + +// Load Double - UNaligned +instruct loadD_unaligned(regD_low dst, memoryF2 mem ) %{ + match(Set dst (LoadD_unaligned mem)); + ins_cost(MEMORY_REF_COST*2+DEFAULT_COST); + size(8); + format %{ "FLDS $dst.lo,$mem\t! misaligned double\n" + "\tFLDS $dst.hi,$mem+4\t!" %} + ins_encode %{ + Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); + __ vldr_f32($dst$$FloatRegister, Amemlo); + __ vldr_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE), Amemhi); + %} + ins_pipe(iload_mem); +%} + +instruct loadF(regF dst, memoryF mem) %{ + match(Set dst (LoadF mem)); + + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FLDS $dst,$mem" %} + ins_encode %{ + __ vldr_f32($dst$$FloatRegister, $mem$$Address); + %} + ins_pipe(floadF_mem); +%} + +// // Load Constant +instruct loadConI( iRegI dst, immI src ) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 3/2); + format %{ "MOV_SLOW $dst, $src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_hi_lo_reg); +%} + +instruct loadConIMov( iRegI dst, immIMov src ) %{ + match(Set dst src); + size(4); + format %{ "MOV $dst, $src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_imm); +%} + +instruct loadConIMovn( iRegI dst, immIRotn src ) %{ + match(Set dst src); + size(4); + format %{ "MVN $dst, ~$src" %} + ins_encode %{ + __ mvn_i($dst$$Register, ~$src$$constant); + %} + ins_pipe(ialu_imm_n); +%} + +instruct loadConI16( iRegI dst, immI16 src ) %{ + match(Set dst src); + size(4); + format %{ "MOVW $dst, $src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant); + %} + ins_pipe(ialu_imm_n); +%} + +instruct loadConP(iRegP dst, immP src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 3/2); + format %{ "MOV_SLOW $dst,$src\t!ptr" %} + ins_encode %{ + relocInfo::relocType constant_reloc = _opnds[1]->constant_reloc(); + intptr_t val = $src$$constant; + if (constant_reloc == relocInfo::oop_type) { + __ movoop($dst$$Register, (jobject)val, true); + } else if (constant_reloc == relocInfo::metadata_type) { + __ mov_metadata($dst$$Register, (Metadata*)val); + } else { + __ mov($dst$$Register, val); + } + %} + ins_pipe(loadConP); +%} + + +instruct loadConP_poll(iRegP dst, immP_poll src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + format %{ "MOV_SLOW $dst,$src\t!ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant); + %} + ins_pipe(loadConP_poll); +%} + +instruct loadConL(iRegL dst, immL src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 4); + format %{ "MOV_SLOW $dst.lo, $src & 0x0FFFFFFFFL \t! long\n\t" + "MOV_SLOW $dst.hi, $src >> 32" %} + ins_encode %{ + __ mov(reg_to_register_object($dst$$reg), $src$$constant & 0x0FFFFFFFFL); + __ mov(reg_to_register_object($dst$$reg + 1), ((julong)($src$$constant)) >> 32); + %} + ins_pipe(loadConL); +%} + +instruct loadConL16( iRegL dst, immL16 src ) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 2); + + size(8); + format %{ "MOVW $dst.lo, $src \n\t" + "MOVW $dst.hi, 0 \n\t" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant); + __ movw_i(reg_to_register_object($dst$$reg + 1), 0); + %} + ins_pipe(ialu_imm); +%} + +instruct loadConF_imm8(regF dst, imm8F src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + size(4); + + format %{ "FCONSTS $dst, $src"%} + + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$constant); + %} + ins_pipe(loadConFD); // FIXME +%} + +instruct loadConF(regF dst, immF src, iRegI tmp) %{ + match(Set dst src); + ins_cost(DEFAULT_COST * 2); + effect(TEMP tmp); + size(3*4); + + format %{ "MOV_SLOW $tmp, $src\n\t" + "FMSR $dst, $tmp"%} + + ins_encode %{ + // FIXME revisit once 6961697 is in + union { + jfloat f; + int i; + } v; + v.f = $src$$constant; + __ mov($tmp$$Register, v.i); + __ vmov_f32($dst$$FloatRegister, $tmp$$Register); + %} + ins_pipe(loadConFD); // FIXME +%} + +instruct loadConD_imm8(regD dst, imm8D src) %{ + match(Set dst src); + ins_cost(DEFAULT_COST); + size(4); + + format %{ "FCONSTD $dst, $src"%} + + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$constant); + %} + ins_pipe(loadConFD); // FIXME +%} + +instruct loadConD(regD dst, immD src, iRegP tmp) %{ + match(Set dst src); + effect(TEMP tmp); + ins_cost(MEMORY_REF_COST); + format %{ "FLDD $dst, [$constanttablebase + $constantoffset]\t! load from constant table: double=$src" %} + + ins_encode %{ + Register r = $constanttablebase; + int offset = $constantoffset($src); + if (!is_memoryD(offset)) { // can't use a predicate + // in load constant instructs + __ add($tmp$$Register, r, offset); + r = $tmp$$Register; + offset = 0; + } + __ vldr_f64($dst$$FloatRegister, Address(r, offset)); + %} + ins_pipe(loadConFD); +%} + +// Prefetch instructions. +// Must be safe to execute with invalid address (cannot fault). + +instruct prefetchAlloc_mp( memoryP mem ) %{ + predicate(VM_Version::features() & FT_MP_EXT); + match( PrefetchAllocation mem ); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "PLDW $mem\t! Prefetch allocation" %} + ins_encode %{ + __ pldw($mem$$Address); + %} + ins_pipe(iload_mem); +%} + +instruct prefetchAlloc_sp( memoryP mem ) %{ + predicate(!(VM_Version::features() & FT_MP_EXT)); + match( PrefetchAllocation mem ); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "PLD $mem\t! Prefetch allocation" %} + ins_encode %{ + __ pld($mem$$Address); + %} + ins_pipe(iload_mem); +%} + +//----------Store Instructions------------------------------------------------- +// Store Byte +instruct storeB(memoryB mem, store_RegI src) %{ + match(Set mem (StoreB mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "STRB $src,$mem\t! byte" %} + ins_encode %{ + __ strb($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +instruct storeCM(memoryB mem, store_RegI src) %{ + match(Set mem (StoreCM mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "STRB $src,$mem\t! CMS card-mark byte" %} + ins_encode %{ + __ strb($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +// Store Char/Short + +instruct storeC(memoryS mem, store_RegI src) %{ + match(Set mem (StoreC mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "STRH $src,$mem\t! short" %} + ins_encode %{ + __ strh($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +// Store Integer + +instruct storeI(memoryI mem, store_RegI src) %{ + match(Set mem (StoreI mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "str $src,$mem" %} + ins_encode %{ + __ str($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +// Store Long + +instruct storeL(memoryL mem, store_RegLd src) %{ + predicate(!((StoreLNode*)n)->require_atomic_access()); + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "strd $src,$mem\t! long\n\t" %} + + ins_encode %{ + __ strd($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +instruct storeL_2instr(memorylong mem, iRegL src) %{ + predicate(!((StoreLNode*)n)->require_atomic_access()); + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST + DEFAULT_COST); + + size(8); + format %{ "STR $src.lo,$mem\t! long\n\t" + "STR $src.hi,$mem+4" %} + + ins_encode %{ + Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); + Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); + __ str($src$$Register, Amemlo); + __ str($src$$Register->successor(), Amemhi); + %} + ins_pipe(istore_mem_reg); +%} + +instruct storeL_volatile(indirect mem, iRegL src) %{ + predicate(((StoreLNode*)n)->require_atomic_access()); + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "STMIA $src,$mem\t! long" %} + ins_encode %{ + // FIXME: why is stmia considered atomic? Should be strexd + // TODO: need 3 temp registers to use atomic_strd + __ stmia(reg_to_register_object($mem$$base), RegSet::of($src$$Register, reg_to_register_object($src$$reg + 1)).bits(), /*wb*/false); + %} + ins_pipe(istore_mem_reg); +%} + +instruct storeL_volatile_fp(memoryD mem, iRegL src) %{ + predicate(((StoreLNode*)n)->require_atomic_access()); + match(Set mem (StoreL mem src)); + ins_cost(MEMORY_REF_COST); + size(8); + format %{ "FMDRR S14, $src\t! long \n\t" + "FSTD S14, $mem" %} + ins_encode %{ + __ vmov_f64(f14, $src$$Register, $src$$Register->successor()); + __ vstr_f64(f14, $mem$$Address); + %} + ins_pipe(istore_mem_reg); +%} + +// Store Pointer + +instruct storeP(memoryP mem, store_ptr_RegP src) %{ + match(Set mem (StoreP mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + + format %{ "STR $src,$mem\t! ptr" %} + ins_encode %{ + __ str($src$$Register, $mem$$Address); + %} + ins_pipe(istore_mem_spORreg); +%} + +// Store Double + +instruct storeD(memoryD mem, regD src) %{ + match(Set mem (StoreD mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + // FIXME: needs to be atomic, but ARMv7 A.R.M. guarantees + // only LDREXD and STREXD are 64-bit single-copy atomic + format %{ "FSTD $src,$mem" %} + ins_encode %{ + __ vstr_f64($src$$FloatRegister, $mem$$Address); + %} + ins_pipe(fstoreD_mem_reg); +%} + +// Store Float + +instruct storeF( memoryF mem, regF src) %{ + match(Set mem (StoreF mem src)); + ins_cost(MEMORY_REF_COST); + + size(4); + format %{ "FSTS $src,$mem" %} + ins_encode %{ + __ vstr_f32($src$$FloatRegister, $mem$$Address); + %} + ins_pipe(fstoreF_mem_reg); +%} + +//----------MemBar Instructions----------------------------------------------- +// Memory barrier flavors + +// TODO: take advantage of Aarch64 load-acquire, store-release, etc +// pattern-match out unnecessary membars +instruct membar_storestore() %{ + match(MemBarStoreStore); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-storestore" %} + ins_encode %{ + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore)); + %} + ins_pipe(long_memory_op); +%} + +instruct membar_acquire() %{ + match(MemBarAcquire); + match(LoadFence); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-acquire" %} + ins_encode %{ + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadLoad | MacroAssembler::LoadStore)); + %} + ins_pipe(long_memory_op); +%} + +instruct membar_acquire_lock() %{ + match(MemBarAcquireLock); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_release() %{ + match(MemBarRelease); + match(StoreFence); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-release" %} + ins_encode %{ + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore | MacroAssembler::LoadStore)); + %} + ins_pipe(long_memory_op); +%} + +instruct membar_release_lock() %{ + match(MemBarReleaseLock); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +instruct membar_volatile() %{ + match(MemBarVolatile); + ins_cost(4*MEMORY_REF_COST); + + size(4); + format %{ "MEMBAR-volatile" %} + ins_encode %{ + __ membar(MacroAssembler::StoreLoad); + %} + ins_pipe(long_memory_op); +%} + +instruct unnecessary_membar_volatile() %{ + match(MemBarVolatile); + predicate(Matcher::post_store_load_barrier(n)); + ins_cost(0); + + size(0); + format %{ "!MEMBAR-volatile (unnecessary so empty encoding)" %} + ins_encode( ); + ins_pipe(empty); +%} + +//----------Register Move Instructions----------------------------------------- +// instruct roundDouble_nop(regD dst) %{ +// match(Set dst (RoundDouble dst)); +// ins_pipe(empty); +// %} + + +// instruct roundFloat_nop(regF dst) %{ +// match(Set dst (RoundFloat dst)); +// ins_pipe(empty); +// %} + + +// Cast Index to Pointer for unsafe natives +instruct castX2P(iRegX src, iRegP dst) %{ + match(Set dst (CastX2P src)); + + format %{ "MOV $dst,$src\t! IntX->Ptr if $dst != $src" %} + ins_encode %{ + if ($dst$$Register != $src$$Register) { + __ mov($dst$$Register, $src$$Register); + } + %} + ins_pipe(ialu_reg); +%} + +// Cast Pointer to Index for unsafe natives +instruct castP2X(iRegP src, iRegX dst) %{ + match(Set dst (CastP2X src)); + + format %{ "MOV $dst,$src\t! Ptr->IntX if $dst != $src" %} + ins_encode %{ + if ($dst$$Register != $src$$Register) { + __ mov($dst$$Register, $src$$Register); + } + %} + ins_pipe(ialu_reg); +%} + +//----------Conditional Move--------------------------------------------------- +// Conditional move +instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src\t! int" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIP_immMov(cmpOpP cmp, flagsRegP pcc, iRegI dst, immIMov src) %{ + match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIP_imm16(cmpOpP cmp, flagsRegP pcc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOVw$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovI_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovI_immMov(cmpOp cmp, flagsReg icc, iRegI dst, immIMov src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovII_imm16(cmpOp cmp, flagsReg icc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOVw$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovII_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovII_immMov_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immIMov src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovII_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + size(4); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIIu_immMov(cmpOpU cmp, flagsRegU icc, iRegI dst, immIMov src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIIu_imm16(cmpOpU cmp, flagsRegU icc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp icc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +// Conditional move +instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPP_imm(cmpOpP cmp, flagsRegP pcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +// This instruction also works with CmpN so we don't need cmovPN_reg. +instruct cmovPI_reg(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $dst,$src\t! ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $dst,$src\t! ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPIu_reg(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "MOV$cmp $dst,$src\t! ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPI_imm(cmpOp cmp, flagsReg icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(4); + format %{ "MOV$cmp $dst,$src\t! ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPI_imm_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + + size(4); + format %{ "MOV$cmp $dst,$src\t! ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(4); + format %{ "MOV$cmp $dst,$src\t! ptr" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +// Conditional move +instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFI_reg(cmpOp cmp, flagsReg icc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFIu_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +// Conditional move +instruct cmovDP_reg(cmpOpP cmp, flagsRegP pcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_double_move); +%} + +instruct cmovDI_reg(cmpOp cmp, flagsReg icc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_double_move); +%} + +instruct cmovDI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_double_move); +%} + +instruct cmovDIu_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_double_move); +%} + +// Conditional move +instruct cmovLP_reg(cmpOpP cmp, flagsRegP pcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src))); + ins_cost(150); + + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst.hi,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct cmovLP_immRot(cmpOpP cmp, flagsRegP pcc, iRegL dst, immLlowRot src) %{ + match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ mov($dst$$Register, (long)$src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLP_imm16(cmpOpP cmp, flagsRegP pcc, iRegL dst, immL16 src) %{ + match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src))); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLI_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst.hi,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLI_reg_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(150); + + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst.hi,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct cmovLI_immRot(cmpOp cmp, flagsReg icc, iRegL dst, immLlowRot src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct cmovLI_immRot_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immLlowRot src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ mov($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLI_imm16(cmpOp cmp, flagsReg icc, iRegL dst, immL16 src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + __ movw_i($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLI_imm16_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, iRegL dst, immL16 src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || + _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + ins_cost(140); + + size(8); + format %{ "MOV$cmp $dst.lo,$src\t! long\n\t" + "MOV$cmp $dst.hi,0" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + __ movw_i($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLIu_reg(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp icc) (Binary dst src))); + ins_cost(150); + + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst.hi,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + + +//----------OS and Locking Instructions---------------------------------------- + +// This name is KNOWN by the ADLC and cannot be changed. +// The ADLC forces a 'TypeRawPtr::BOTTOM' output type +// for this guy. +instruct tlsLoadP(RthreadRegP dst) %{ + match(Set dst (ThreadLocal)); + + size(0); + ins_cost(0); + format %{ "! TLS is in $dst" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(ialu_none); +%} + +instruct checkCastPP( iRegP dst ) %{ + match(Set dst (CheckCastPP dst)); + + size(0); + format %{ "! checkcastPP of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(empty); +%} + + +instruct castPP( iRegP dst ) %{ + match(Set dst (CastPP dst)); + format %{ "! castPP of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_pipe(empty); +%} + +instruct castII( iRegI dst ) %{ + match(Set dst (CastII dst)); + format %{ "! castII of $dst" %} + ins_encode( /*empty encoding*/ ); + ins_cost(0); + ins_pipe(empty); +%} + +//----------Arithmetic Instructions-------------------------------------------- +// Addition Instructions +// Register Addition +instruct addI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (AddI src1 src2)); + + size(4); + format %{ "add_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct addshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AddI (LShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %} + ins_encode %{ + __ add($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct addshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (AddI (LShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1<<$src2\t! int" %} + ins_encode %{ + __ add($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct addsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AddI (RShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %} + ins_encode %{ + __ add($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct addsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (AddI (RShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1>>$src2\t! int" %} + ins_encode %{ + __ add($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct addshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AddI (URShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %} + ins_encode %{ + __ add($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct addshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (AddI (URShiftI src1 src2) src3)); + + size(4); + format %{ "add_32 $dst,$src3,$src1>>>$src2\t! int" %} + ins_encode %{ + __ add($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate Addition +instruct addI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{ + match(Set dst (AddI src1 src2)); + + size(4); + format %{ "add_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ add($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +// Pointer Register Addition +instruct addP_reg_reg(iRegP dst, iRegP src1, iRegX src2) %{ + match(Set dst (AddP src1 src2)); + + size(4); + format %{ "ADD $dst,$src1,$src2\t! ptr" %} + ins_encode %{ + __ add($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +// shifted iRegX operand +operand shiftedX(iRegX src2, shimmX src3) %{ +//constraint(ALLOC_IN_RC(sp_ptr_reg)); + match(LShiftX src2 src3); + + op_cost(1); + format %{ "$src2 << $src3" %} + interface(MEMORY_INTER) %{ + base($src2); + index(0xff); + scale($src3); + disp(0x0); + %} +%} + +instruct addshlP_reg_reg_imm(iRegP dst, iRegP src1, shiftedX src2) %{ + match(Set dst (AddP src1 src2)); + + ins_cost(DEFAULT_COST * 3/2); + size(4); + format %{ "ADD $dst,$src1,$src2\t! ptr" %} + ins_encode %{ + Register base = reg_to_register_object($src2$$base); + __ add($dst$$Register, $src1$$Register, base, lsl($src2$$scale)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Pointer Immediate Addition +instruct addP_reg_aimmX(iRegP dst, iRegP src1, aimmX src2) %{ + match(Set dst (AddP src1 src2)); + + size(4); + format %{ "ADD $dst,$src1,$src2\t! ptr" %} + ins_encode %{ + __ add($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +// Long Addition +instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg ccr) %{ + match(Set dst (AddL src1 src2)); + effect(KILL ccr); + ins_cost(DEFAULT_COST*2); + size(8); + format %{ "ADDS $dst.lo,$src1.lo,$src2.lo\t! long\n\t" + "ADC $dst.hi,$src1.hi,$src2.hi" %} + ins_encode %{ + __ adds($dst$$Register, $src1$$Register, $src2$$Register); + __ adc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct addL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg ccr) %{ + match(Set dst (AddL src1 con)); + effect(KILL ccr); + size(8); + format %{ "ADDS $dst.lo,$src1.lo,$con\t! long\n\t" + "ADC $dst.hi,$src1.hi,0" %} + ins_encode %{ + __ adds($dst$$Register, $src1$$Register, (long)$con$$constant); + __ adc($dst$$Register->successor(), $src1$$Register->successor(), 0); + %} + ins_pipe(ialu_reg_imm); +%} + +//----------Conditional_store-------------------------------------------------- +// Conditional-store of the updated heap-top. +// Used during allocation of the shared heap. +// Sets flags (EQ) on success. + +// TODO: optimize out barriers with AArch64 load-acquire/store-release +// LoadP-locked. +instruct loadPLocked(iRegP dst, memoryex mem) %{ + match(Set dst (LoadPLocked mem)); + size(4); + format %{ "LDREX $dst,$mem" %} + ins_encode %{ + __ ldrex($dst$$Register,$mem$$Address); + %} + ins_pipe(iload_mem); +%} + +instruct storePConditional( memoryex heap_top_ptr, iRegP oldval, iRegP newval, iRegI tmp, flagsRegP pcc ) %{ + predicate(_kids[1]->_kids[0]->_leaf->Opcode() == Op_LoadPLocked); // only works in conjunction with a LoadPLocked node + match(Set pcc (StorePConditional heap_top_ptr (Binary oldval newval))); + effect( TEMP tmp ); + size(8); + format %{ "STREX $tmp,$newval,$heap_top_ptr\n\t" + "CMP $tmp, 0" %} + ins_encode %{ + __ strex($tmp$$Register, $newval$$Register, $heap_top_ptr$$Address); + __ cmp($tmp$$Register, 0); + %} + ins_pipe( long_memory_op ); +%} + +// Conditional-store of an intx value. +instruct storeXConditional( memoryex mem, iRegX oldval, iRegX newval, iRegX tmp, flagsReg icc ) %{ + match(Set icc (StoreIConditional mem (Binary oldval newval))); + effect( TEMP tmp ); + size(28); + format %{ "loop: \n\t" + "LDREX $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem], DOESN'T set $newval=[$mem] in any case\n\t" + "XORS $tmp,$tmp, $oldval\n\t" + "STREX.eq $tmp, $newval, $mem\n\t" + "CMP.eq $tmp, 1 \n\t" + "B.eq loop \n\t" + "TEQ $tmp, 0\n\t" + "membar LoadStore|LoadLoad" %} + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($tmp$$Register, $mem$$Address); + __ eors($tmp$$Register, $tmp$$Register, $oldval$$Register); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ); + __ cmp($tmp$$Register, 1, Assembler::EQ); + __ b(loop, Assembler::EQ); + __ teq($tmp$$Register, 0); + // used by biased locking only. Requires a membar. + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::LoadStore | MacroAssembler::LoadLoad)); + %} + ins_pipe( long_memory_op ); +%} + +// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them + +instruct compareAndSwapL_bool(memoryex mem, iRegL oldval, iRegLd newval, iRegI res, iRegLd tmp, flagsReg ccr ) %{ + match(Set res (CompareAndSwapL mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(32); + format %{ "loop: \n\t" + "LDREXD $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp.lo, $oldval.lo\n\t" + "CMP.eq $tmp.hi, $oldval.hi\n\t" + "STREXD.eq $tmp, $newval, $mem\n\t" + "MOV.ne $tmp, 0 \n\t" + "XORS.eq $tmp,$tmp, 1 \n\t" + "B.eq loop \n\t" + "MOV $res, $tmp" %} + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($tmp$$Register, $mem$$Address); + __ cmp($tmp$$Register, $oldval$$Register); + __ cmp($tmp$$Register->successor(), $oldval$$Register->successor(), Assembler::EQ); + __ strexd($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ); + __ mov($tmp$$Register, 0, Assembler::NE); + __ eors($tmp$$Register, $tmp$$Register, 1, Assembler::EQ); + __ b(loop, Assembler::EQ); + __ mov($res$$Register, $tmp$$Register); + %} + ins_pipe( long_memory_op ); +%} + + +instruct compareAndSwapI_bool(memoryex mem, iRegI oldval, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr ) %{ + match(Set res (CompareAndSwapI mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(28); + format %{ "loop: \n\t" + "LDREX $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp, $oldval\n\t" + "STREX.eq $tmp, $newval, $mem\n\t" + "MOV.ne $tmp, 0 \n\t" + "XORS.eq $tmp,$tmp, 1 \n\t" + "B.eq loop \n\t" + "MOV $res, $tmp" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($tmp$$Register,$mem$$Address); + __ cmp($tmp$$Register, $oldval$$Register); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ); + __ mov($tmp$$Register, 0, Assembler::NE); + __ eors($tmp$$Register, $tmp$$Register, 1, Assembler::EQ); + __ b(loop, Assembler::EQ); + __ mov($res$$Register, $tmp$$Register); + %} + ins_pipe( long_memory_op ); +%} + +instruct compareAndSwapP_bool(memoryex mem, iRegP oldval, iRegP newval, iRegI res, iRegI tmp, flagsReg ccr ) %{ + match(Set res (CompareAndSwapP mem (Binary oldval newval))); + effect( KILL ccr, TEMP tmp); + size(28); + format %{ "loop: \n\t" + "LDREX $tmp, $mem\t! If $oldval==[$mem] Then store $newval into [$mem]\n\t" + "CMP $tmp, $oldval\n\t" + "STREX.eq $tmp, $newval, $mem\n\t" + "MOV.ne $tmp, 0 \n\t" + "EORS.eq $tmp,$tmp, 1 \n\t" + "B.eq loop \n\t" + "MOV $res, $tmp" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($tmp$$Register,$mem$$Address); + __ cmp($tmp$$Register, $oldval$$Register); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address, Assembler::EQ); + __ mov($tmp$$Register, 0, Assembler::NE); + __ eors($tmp$$Register, $tmp$$Register, 1, Assembler::EQ); + __ b(loop, Assembler::EQ); + __ mov($res$$Register, $tmp$$Register); + %} + ins_pipe( long_memory_op ); +%} + +instruct xaddI_aimmI_no_res(memoryex mem, aimmI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(KILL ccr, TEMP tmp1, TEMP tmp2); + size(20); + format %{ "loop: \n\t" + "LDREX $tmp1, $mem\n\t" + "ADD $tmp1, $tmp1, $add\n\t" + "STREX $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($tmp1$$Register,$mem$$Address); + __ add($tmp1$$Register, $tmp1$$Register, $add$$constant); + __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +instruct xaddI_reg_no_res(memoryex mem, iRegI add, Universe dummy, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddI mem add)); + effect(KILL ccr, TEMP tmp1, TEMP tmp2); + size(20); + format %{ "loop: \n\t" + "LDREX $tmp1, $mem\n\t" + "ADD $tmp1, $tmp1, $add\n\t" + "STREX $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($tmp1$$Register,$mem$$Address); + __ add($tmp1$$Register, $tmp1$$Register, $add$$Register); + __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +instruct xaddI_aimmI(memoryex mem, aimmI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ + match(Set res (GetAndAddI mem add)); + effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); + size(20); + format %{ "loop: \n\t" + "LDREX $res, $mem\n\t" + "ADD $tmp1, $res, $add\n\t" + "STREX $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($res$$Register,$mem$$Address); + __ add($tmp1$$Register, $res$$Register, $add$$constant); + __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +instruct xaddI_reg(memoryex mem, iRegI add, iRegI res, iRegI tmp1, iRegI tmp2, flagsReg ccr) %{ + match(Set res (GetAndAddI mem add)); + effect(KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); + size(20); + format %{ "loop: \n\t" + "LDREX $res, $mem\n\t" + "ADD $tmp1, $res, $add\n\t" + "STREX $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($res$$Register,$mem$$Address); + __ add($tmp1$$Register, $res$$Register, $add$$Register); + __ strex($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +instruct xaddL_reg_no_res(memoryex mem, iRegL add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem add)); + effect( KILL ccr, TEMP tmp1, TEMP tmp2); + size(24); + format %{ "loop: \n\t" + "LDREXD $tmp1, $mem\n\t" + "ADDS $tmp1.lo, $tmp1.lo, $add.lo\n\t" + "ADC $tmp1.hi, $tmp1.hi, $add.hi\n\t" + "STREXD $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($tmp1$$Register, $mem$$Address); + __ adds($tmp1$$Register, $tmp1$$Register, $add$$Register); + __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), $add$$Register->successor()); + __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct xaddL_immRot_no_res(memoryex mem, immLlowRot add, Universe dummy, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ + predicate(n->as_LoadStore()->result_not_used()); + match(Set dummy (GetAndAddL mem add)); + effect( KILL ccr, TEMP tmp1, TEMP tmp2); + size(24); + format %{ "loop: \n\t" + "LDREXD $tmp1, $mem\n\t" + "ADDS $tmp1.lo, $tmp1.lo, $add\n\t" + "ADC $tmp1.hi, $tmp1.hi, 0\n\t" + "STREXD $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($tmp1$$Register, $mem$$Address); + __ adds($tmp1$$Register, $tmp1$$Register, (long)$add$$constant); + __ adc($tmp1$$Register->successor(), $tmp1$$Register->successor(), 0); + __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +instruct xaddL_reg(memoryex mem, iRegL add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ + match(Set res (GetAndAddL mem add)); + effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); + size(24); + format %{ "loop: \n\t" + "LDREXD $res, $mem\n\t" + "ADDS $tmp1.lo, $res.lo, $add.lo\n\t" + "ADC $tmp1.hi, $res.hi, $add.hi\n\t" + "STREXD $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($res$$Register, $mem$$Address); + __ adds($tmp1$$Register, $res$$Register, $add$$Register); + __ adc($tmp1$$Register->successor(), $res$$Register->successor(), $add$$Register->successor()); + __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct xaddL_immRot(memoryex mem, immLlowRot add, iRegLd res, iRegLd tmp1, iRegI tmp2, flagsReg ccr) %{ + match(Set res (GetAndAddL mem add)); + effect( KILL ccr, TEMP tmp1, TEMP tmp2, TEMP res); + size(24); + format %{ "loop: \n\t" + "LDREXD $res, $mem\n\t" + "ADDS $tmp1.lo, $res.lo, $add\n\t" + "ADC $tmp1.hi, $res.hi, 0\n\t" + "STREXD $tmp2, $tmp1, $mem\n\t" + "CMP $tmp2, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($res$$Register, $mem$$Address); + __ adds($tmp1$$Register, $res$$Register, (long)$add$$constant); + __ adc($tmp1$$Register->successor(), $res$$Register->successor(), 0); + __ strexd($tmp2$$Register, $tmp1$$Register, $mem$$Address); + __ cmp($tmp2$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +instruct xchgI(memoryex mem, iRegI newval, iRegI res, iRegI tmp, flagsReg ccr) %{ + match(Set res (GetAndSetI mem newval)); + effect(KILL ccr, TEMP tmp, TEMP res); + size(16); + format %{ "loop: \n\t" + "LDREX $res, $mem\n\t" + "STREX $tmp, $newval, $mem\n\t" + "CMP $tmp, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($res$$Register,$mem$$Address); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address); + __ cmp($tmp$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +instruct xchgL(memoryex mem, iRegLd newval, iRegLd res, iRegI tmp, flagsReg ccr) %{ + match(Set res (GetAndSetL mem newval)); + effect( KILL ccr, TEMP tmp, TEMP res); + size(16); + format %{ "loop: \n\t" + "LDREXD $res, $mem\n\t" + "STREXD $tmp, $newval, $mem\n\t" + "CMP $tmp, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrexd($res$$Register, $mem$$Address); + __ strexd($tmp$$Register, $newval$$Register, $mem$$Address); + __ cmp($tmp$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +instruct xchgP(memoryex mem, iRegP newval, iRegP res, iRegI tmp, flagsReg ccr) %{ + match(Set res (GetAndSetP mem newval)); + effect(KILL ccr, TEMP tmp, TEMP res); + size(16); + format %{ "loop: \n\t" + "LDREX $res, $mem\n\t" + "STREX $tmp, $newval, $mem\n\t" + "CMP $tmp, 0 \n\t" + "B.ne loop \n\t" %} + + ins_encode %{ + Label loop; + __ bind(loop); + __ ldrex($res$$Register,$mem$$Address); + __ strex($tmp$$Register, $newval$$Register, $mem$$Address); + __ cmp($tmp$$Register, 0); + __ b(loop, Assembler::NE); + %} + ins_pipe( long_memory_op ); +%} + +//--------------------- +// Subtraction Instructions +// Register Subtraction +instruct subI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (SubI src1 src2)); + + size(4); + format %{ "sub_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct subshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI src1 (LShiftI src2 src3))); + + size(4); + format %{ "SUB $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct subshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (SubI src1 (LShiftI src2 src3))); + + size(4); + format %{ "sub_32 $dst,$src1,$src2<<$src3\t! int" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct subsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI src1 (RShiftI src2 src3))); + + size(4); + format %{ "SUB $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct subsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (SubI src1 (RShiftI src2 src3))); + + size(4); + format %{ "sub_32 $dst,$src1,$src2>>$src3\t! int" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct subshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI src1 (URShiftI src2 src3))); + + size(4); + format %{ "SUB $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct subshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (SubI src1 (URShiftI src2 src3))); + + size(4); + format %{ "sub_32 $dst,$src1,$src2>>>$src3\t! int" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI (LShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1<<$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbshlI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (SubI (LShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1<<$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsl($src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI (RShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1>>$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbsarI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (SubI (RShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1>>$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, $src1$$Register, asr($src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (SubI (URShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1>>>$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct rsbshrI_reg_imm_reg(iRegI dst, iRegI src1, immU5 src2, iRegI src3) %{ + match(Set dst (SubI (URShiftI src1 src2) src3)); + + size(4); + format %{ "RSB $dst,$src3,$src1>>>$src2" %} + ins_encode %{ + __ rsb($dst$$Register, $src3$$Register, $src1$$Register, lsr($src2$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate Subtraction +instruct subI_reg_aimmI(iRegI dst, iRegI src1, aimmI src2) %{ + match(Set dst (SubI src1 src2)); + + size(4); + format %{ "sub_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct subI_reg_immRotneg(iRegI dst, iRegI src1, aimmIneg src2) %{ + match(Set dst (AddI src1 src2)); + + size(4); + format %{ "sub_32 $dst,$src1,-($src2)\t! int" %} + ins_encode %{ + __ sub($dst$$Register, $src1$$Register, -$src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct subI_immRot_reg(iRegI dst, immIRot src1, iRegI src2) %{ + match(Set dst (SubI src1 src2)); + + size(4); + format %{ "RSB $dst,$src2,src1" %} + ins_encode %{ + __ rsb($dst$$Register, $src2$$Register, $src1$$constant); + %} + ins_pipe(ialu_zero_reg); +%} + +// Register Subtraction +instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2, flagsReg icc ) %{ + match(Set dst (SubL src1 src2)); + effect (KILL icc); + + size(8); + format %{ "SUBS $dst.lo,$src1.lo,$src2.lo\t! long\n\t" + "SBC $dst.hi,$src1.hi,$src2.hi" %} + ins_encode %{ + __ subs($dst$$Register, $src1$$Register, $src2$$Register); + __ sbc($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate Subtraction +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct subL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con, flagsReg icc) %{ + match(Set dst (SubL src1 con)); + effect (KILL icc); + + size(8); + format %{ "SUB $dst.lo,$src1.lo,$con\t! long\n\t" + "SBC $dst.hi,$src1.hi,0" %} + ins_encode %{ + __ subs($dst$$Register, $src1$$Register, (long)$con$$constant); + __ sbc($dst$$Register->successor(), $src1$$Register->successor(), 0); + %} + ins_pipe(ialu_reg_imm); +%} + +// Long negation +instruct negL_reg_reg(iRegL dst, immL0 zero, iRegL src2, flagsReg icc) %{ + match(Set dst (SubL zero src2)); + effect (KILL icc); + + size(8); + format %{ "RSBS $dst.lo,$src2.lo,0\t! long\n\t" + "RSC $dst.hi,$src2.hi,0" %} + ins_encode %{ + __ rsbs($dst$$Register, $src2$$Register, 0); + __ rsc($dst$$Register->successor(), $src2$$Register->successor(), 0); + %} + ins_pipe(ialu_zero_reg); +%} + +// Multiplication Instructions +// Integer Multiplication +// Register Multiplication +instruct mulI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (MulI src1 src2)); + + ins_cost(DEFAULT_COST); + size(4); + format %{ "mul_32 $dst,$src1,$src2" %} + ins_encode %{ + __ mul($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(imul_reg_reg); +%} + +instruct mulL_lo1_hi2(iRegL dst, iRegL src1, iRegL src2) %{ + effect(DEF dst, USE src1, USE src2); + ins_cost(DEFAULT_COST); + size(4); + format %{ "MUL $dst.hi,$src1.lo,$src2.hi\t! long" %} + ins_encode %{ + __ mul($dst$$Register->successor(), $src1$$Register, $src2$$Register->successor()); + %} + ins_pipe(imul_reg_reg); +%} + +instruct mulL_hi1_lo2(iRegL dst, iRegL src1, iRegL src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + ins_cost(DEFAULT_COST*3/2); + size(8); + format %{ "MLA $dst.hi,$src1.hi,$src2.lo,$dst.hi\t! long\n\t" + "MOV $dst.lo, 0"%} + ins_encode %{ + __ mla($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register, $dst$$Register->successor()); + __ mov($dst$$Register, 0); + %} + ins_pipe(imul_reg_reg); +%} + +instruct mulL_lo1_lo2(iRegL dst, iRegL src1, iRegL src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + ins_cost(DEFAULT_COST*3/2); + size(4); + format %{ "UMLAL $dst.lo,$dst.hi,$src1,$src2\t! long" %} + ins_encode %{ + __ umlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register); + %} + ins_pipe(imul_reg_reg); +%} + +instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (MulL src1 src2)); + ins_cost(DEFAULT_COST*8/2); + + expand %{ + mulL_lo1_hi2(dst, src1, src2); + mulL_hi1_lo2(dst, src1, src2); + mulL_lo1_lo2(dst, src1, src2); + %} +%} + +instruct mla_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI srcA) %{ + match(Set dst (AddI (MulI src1 src2) srcA)); + + ins_cost(DEFAULT_COST*3/2); + size(4); + format %{ "MLA $dst,$src1,$src2,$srcA" %} + ins_encode %{ + __ mla($dst$$Register, $src1$$Register, $src2$$Register, $srcA$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct mls_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI srcA) %{ + match(Set dst (SubI srcA (MulI src1 src2))); + + ins_cost(DEFAULT_COST*3/2); + size(4); + format %{ "MLS $dst,$src1,$src2,$srcA" %} + ins_encode %{ + __ mls($dst$$Register, $src1$$Register, $src2$$Register, $srcA$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct smlal_reg_reg_reg(iRegL dst, iRegI src1, iRegI src2) %{ + match(Set dst (AddL (MulL (ConvI2L src1) (ConvI2L src2)) dst)); + + ins_cost(DEFAULT_COST*3/2); + size(4); + format %{ "SMLAL $dst.lo,$dst.hi,$src1,$src2" %} + ins_encode %{ + __ smlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct smull_reg_reg_reg(iRegL dst, iRegI src1, iRegI src2) %{ + match(Set dst (MulL (ConvI2L src1) (ConvI2L src2))); + + ins_cost(DEFAULT_COST*3/2); + size(4); + format %{ "SMULL $dst.lo,$dst.hi,$src1,$src2" %} + ins_encode %{ + __ smull($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +// Integer Division +// Register Division +instruct divI_reg_reg_IDIV(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (DivI src1 src2)); + predicate(VM_Version::features() & FT_HW_DIVIDE); + ins_cost(2*DEFAULT_COST); + + format %{ "SDIV $dst,$src1,$src2"%} + ins_encode %{ + __ sdiv($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(sdiv_reg_reg_IDIV); +%} + +instruct divI_reg_reg_SW(R0RegI dst, R1RegI src1, R2RegI src2, R9RegI temp1, R12RegI temp2, LRRegP lr, flagsReg ccr) %{ + match(Set dst (DivI src1 src2)); + predicate(!(VM_Version::features() & FT_HW_DIVIDE)); + effect( KILL ccr, TEMP temp1, TEMP temp2, USE_KILL src1,USE_KILL src2, KILL lr); + ins_cost((2+71)*DEFAULT_COST); + + format %{ "DIV $dst,$src1,$src2 ! call to StubRoutines::aarch32::idiv_entry()" %} + ins_encode %{ + __ call(StubRoutines::aarch32::idiv_entry(), relocInfo::runtime_call_type); + %} + ins_pipe(sdiv_reg_reg_SW); +%} + +// Register Long Division +instruct divL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{ + match(Set dst (DivL src1 src2)); + effect(CALL); + ins_cost(DEFAULT_COST*71); + format %{ "DIVL $src1,$src2,$dst\t! long ! call to SharedRuntime::ldiv" %} + ins_encode %{ + address target = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv); + __ call(target, relocInfo::runtime_call_type); + %} + ins_pipe(divL_reg_reg); +%} + +// Integer Remainder +// Register Remainder +instruct modI_reg_reg_IDIV(iRegI dst, iRegI src1, iRegI src2, iRegI temp) %{ + match(Set dst (ModI src1 src2)); + predicate(VM_Version::features() & FT_HW_DIVIDE); + effect( TEMP temp); + + format %{ "SDIV $temp,$src1,$src2\n\t" + "MLS $dst, $temp, $src2, $src1"%} + ins_encode %{ + __ sdiv($temp$$Register, $src1$$Register, $src2$$Register); + __ mls($dst$$Register, $temp$$Register, $src2$$Register, $src1$$Register); + %} + ins_pipe(sdiv_reg_reg_IDIV); +%} + +instruct modI_reg_reg_SW(R0RegI dst, R1RegI src1, R2RegI src2, R9RegI temp1, R12RegI temp2, LRRegP lr, flagsReg ccr ) %{ + match(Set dst (ModI src1 src2)); + predicate(!(VM_Version::features() & FT_HW_DIVIDE)); + effect( KILL ccr, TEMP temp1, TEMP temp2, KILL lr, USE_KILL src1, USE_KILL src2); + + format %{ "MODI $dst,$src1,$src2\t ! call to StubRoutines::aarch32::irem_entry" %} + ins_encode %{ + __ call(StubRoutines::aarch32::irem_entry(), relocInfo::runtime_call_type); + %} + ins_pipe(sdiv_reg_reg_SW); +%} + +// Register Long Remainder +instruct modL_reg_reg(R0R1RegL dst, R2R3RegL src1, R0R1RegL src2) %{ + match(Set dst (ModL src1 src2)); + effect(CALL); + ins_cost(MEMORY_REF_COST); // FIXME + format %{ "modL $dst,$src1,$src2\t ! call to SharedRuntime::lrem" %} + ins_encode %{ + address target = CAST_FROM_FN_PTR(address, SharedRuntime::lrem); + __ call(target, relocInfo::runtime_call_type); + %} + ins_pipe(divL_reg_reg); +%} + +// Integer Shift Instructions + +// Register Shift Left +instruct shlI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (LShiftI src1 src2)); + + size(4); + format %{ "LSL $dst,$src1,$src2 \n\t" %} + ins_encode %{ + __ mov($dst$$Register, $src1$$Register, lsl($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Register Shift Left Immediate +instruct shlI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{ + match(Set dst (LShiftI src1 src2)); + + size(4); + format %{ "LSL $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ lsl($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct shlL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{"OR $dst.hi,$dst.hi,($src1.hi << $src2)" %} + ins_encode %{ + __ orr($dst$$Register->successor(), $dst$$Register->successor(), $src1$$Register->successor(), lsl($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shlL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "LSL $dst.lo,$src1.lo,$src2 \n\t" %} + ins_encode %{ + __ mov($dst$$Register, $src1$$Register, lsl($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shlL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{ + effect(DEF dst, USE src1, USE src2, KILL ccr); + size(16); + format %{ "SUBS $dst.hi,$src2,32 \n\t" + "LSLpl $dst.hi,$src1.lo,$dst.hi \n\t" + "RSBmi $dst.hi,$dst.hi,0 \n\t" + "LSRmi $dst.hi,$src1.lo,$dst.hi" %} + + ins_encode %{ + // $src1$$Register and $dst$$Register->successor() can't be the same + __ subs($dst$$Register->successor(), $src2$$Register, 32); + __ mov($dst$$Register->successor(), $src1$$Register, lsl($dst$$Register->successor()), Assembler::PL); + __ rsb($dst$$Register->successor(), $dst$$Register->successor(), 0, Assembler::MI); + __ mov($dst$$Register->successor(), $src1$$Register, lsr($dst$$Register->successor()), Assembler::MI); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (LShiftL src1 src2)); + + expand %{ + flagsReg ccr; + shlL_reg_reg_overlap(dst, src1, src2, ccr); + shlL_reg_reg_merge_hi(dst, src1, src2); + shlL_reg_reg_merge_lo(dst, src1, src2); + %} +%} + +// Register Shift Left Immediate +instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{ + match(Set dst (LShiftL src1 src2)); + + size(8); + format %{ "LSL $dst.hi,$src1.lo,$src2-32\t! or mov if $src2==32\n\t" + "MOV $dst.lo, 0" %} + ins_encode %{ + if ($src2$$constant == 32) { + __ mov($dst$$Register->successor(), $src1$$Register); + } else { + __ mov($dst$$Register->successor(), $src1$$Register, lsl($src2$$constant-32)); + } + __ mov($dst$$Register, 0); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct shlL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{ + match(Set dst (LShiftL src1 src2)); + + size(12); + format %{ "LSL $dst.hi,$src1.lo,$src2\n\t" + "OR $dst.hi, $dst.hi, $src1.lo >> 32-$src2\n\t" + "LSL $dst.lo,$src1.lo,$src2" %} + ins_encode %{ + // The order of the following 3 instructions matters: src1.lo and + // dst.hi can't overlap but src.hi and dst.hi can. + __ mov($dst$$Register->successor(), $src1$$Register->successor(), lsl($src2$$constant)); + __ orr($dst$$Register->successor(), $dst$$Register->successor(), $src1$$Register, lsr(32-$src2$$constant)); + __ mov($dst$$Register, $src1$$Register, lsl($src2$$constant)); + %} + ins_pipe(ialu_reg_imm); +%} + +// Register Arithmetic Shift Right +instruct sarI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (RShiftI src1 src2)); + size(4); + format %{ "ASR $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ mov($dst$$Register, $src1$$Register, asr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Register Arithmetic Shift Right Immediate +instruct sarI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{ + match(Set dst (RShiftI src1 src2)); + + size(4); + format %{ "ASR $dst,$src1,$src2" %} + ins_encode %{ + __ mov($dst$$Register, $src1$$Register, asr($src2$$constant)); + %} + ins_pipe(ialu_reg_imm); +%} + +// Register Shift Right Arithmetic Long +instruct sarL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "OR $dst.lo,$dst.lo,($src1.lo >> $src2)" %} + ins_encode %{ + __ orr($dst$$Register, $dst$$Register, $src1$$Register, lsr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct sarL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "ASR $dst.hi,$src1.hi,$src2 \n\t" %} + ins_encode %{ + __ mov($dst$$Register->successor(), $src1$$Register->successor(), asr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct sarL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{ + effect(DEF dst, USE src1, USE src2, KILL ccr); + size(16); + format %{ "SUBS $dst.lo,$src2,32 \n\t" + "ASRpl $dst.lo,$src1.hi,$dst.lo \n\t" + "RSBmi $dst.lo,$dst.lo,0 \n\t" + "LSLmi $dst.lo,$src1.hi,$dst.lo" %} + + ins_encode %{ + // $src1$$Register->successor() and $dst$$Register can't be the same + __ subs($dst$$Register, $src2$$Register, 32); + __ mov($dst$$Register, $src1$$Register->successor(), asr($dst$$Register), Assembler::PL); + __ rsb($dst$$Register, $dst$$Register, 0, Assembler::MI); + __ mov($dst$$Register, $src1$$Register->successor(), lsl($dst$$Register), Assembler::MI); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct sarL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (RShiftL src1 src2)); + + expand %{ + flagsReg ccr; + sarL_reg_reg_overlap(dst, src1, src2, ccr); + sarL_reg_reg_merge_lo(dst, src1, src2); + sarL_reg_reg_merge_hi(dst, src1, src2); + %} +%} + +// Register Shift Left Immediate +instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{ + match(Set dst (RShiftL src1 src2)); + + size(8); + format %{ "ASR $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t" + "ASR $dst.hi,$src1.hi, $src2" %} + ins_encode %{ + if ($src2$$constant == 32) { + __ mov($dst$$Register, $src1$$Register->successor()); + } else{ + __ mov($dst$$Register, $src1$$Register->successor(), asr($src2$$constant-32)); + } + __ mov($dst$$Register->successor(), $src1$$Register->successor(), asr(32)); + %} + + ins_pipe(ialu_reg_imm); +%} + +instruct sarL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{ + match(Set dst (RShiftL src1 src2)); + size(12); + format %{ "LSR $dst.lo,$src1.lo,$src2\n\t" + "OR $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t" + "ASR $dst.hi,$src1.hi,$src2" %} + ins_encode %{ + // The order of the following 3 instructions matters: src1.lo and + // dst.hi can't overlap but src.hi and dst.hi can. + __ mov($dst$$Register, $src1$$Register, lsr($src2$$constant)); + __ orr($dst$$Register, $dst$$Register, $src1$$Register->successor(), lsl(32-$src2$$constant)); + __ mov($dst$$Register->successor(), $src1$$Register->successor(), asr($src2$$constant)); + %} + ins_pipe(ialu_reg_imm); +%} + +// Register Shift Right +instruct shrI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (URShiftI src1 src2)); + size(4); + format %{ "LSR $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ mov($dst$$Register, $src1$$Register, lsr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Register Shift Right Immediate +instruct shrI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{ + match(Set dst (URShiftI src1 src2)); + + size(4); + format %{ "LSR $dst,$src1,$src2" %} + ins_encode %{ + __ mov($dst$$Register, $src1$$Register, lsr($src2$$constant)); + %} + ins_pipe(ialu_reg_imm); +%} + +// Register Shift Right +instruct shrL_reg_reg_merge_lo(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "OR $dst.lo,$dst,($src1.lo >>> $src2)" %} + ins_encode %{ + __ orr($dst$$Register, $dst$$Register, $src1$$Register, lsr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shrL_reg_reg_merge_hi(iRegL dst, iRegL src1, iRegI src2) %{ + effect(USE_DEF dst, USE src1, USE src2); + size(4); + format %{ "LSR $dst.hi,$src1.hi,$src2 \n\t" %} + ins_encode %{ + __ mov($dst$$Register->successor(), $src1$$Register->successor(), lsr($src2$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shrL_reg_reg_overlap(iRegL dst, iRegL src1, iRegI src2, flagsReg ccr) %{ + effect(DEF dst, USE src1, USE src2, KILL ccr); + size(16); + format %{ "SUBS $dst,$src2,32 \n\t" + "LSRpl $dst,$src1.hi,$dst \n\t" + "RSBmi $dst,$dst,0 \n\t" + "LSLmi $dst,$src1.hi,$dst" %} + + ins_encode %{ + // $src1$$Register->successor() and $dst$$Register can't be the same + __ subs($dst$$Register, $src2$$Register, 32); + __ mov($dst$$Register, $src1$$Register->successor(), lsr($dst$$Register), Assembler::PL); + __ rsb($dst$$Register, $dst$$Register, 0, Assembler::MI); + __ mov($dst$$Register, $src1$$Register->successor(), lsl($dst$$Register), Assembler::MI); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct shrL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{ + match(Set dst (URShiftL src1 src2)); + + expand %{ + flagsReg ccr; + shrL_reg_reg_overlap(dst, src1, src2, ccr); + shrL_reg_reg_merge_lo(dst, src1, src2); + shrL_reg_reg_merge_hi(dst, src1, src2); + %} +%} + +// Register Shift Right Immediate +instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6Big src2) %{ + match(Set dst (URShiftL src1 src2)); + + size(8); + format %{ "LSR $dst.lo,$src1.hi,$src2-32\t! or mov if $src2==32\n\t" + "MOV $dst.hi, 0" %} + ins_encode %{ + if ($src2$$constant == 32) { + __ mov($dst$$Register, $src1$$Register->successor()); + } else { + __ mov($dst$$Register, $src1$$Register->successor(), lsr($src2$$constant-32)); + } + __ mov($dst$$Register->successor(), 0); + %} + + ins_pipe(ialu_reg_imm); +%} + +instruct shrL_reg_imm5(iRegL dst, iRegL src1, immU5 src2) %{ + match(Set dst (URShiftL src1 src2)); + + size(12); + format %{ "LSR $dst.lo,$src1.lo,$src2\n\t" + "OR $dst.lo, $dst.lo, $src1.hi << 32-$src2\n\t" + "LSR $dst.hi,$src1.hi,$src2" %} + ins_encode %{ + // The order of the following 3 instructions matters: src1.lo and + // dst.hi can't overlap but src.hi and dst.hi can. + __ mov($dst$$Register, $src1$$Register, lsr($src2$$constant)); + __ orr($dst$$Register, $dst$$Register, $src1$$Register->successor(), lsl(32-$src2$$constant)); + __ mov($dst$$Register->successor(), $src1$$Register->successor(), lsr($src2$$constant)); + %} + ins_pipe(ialu_reg_imm); +%} + + +instruct shrP_reg_imm5(iRegX dst, iRegP src1, immU5 src2) %{ + match(Set dst (URShiftI (CastP2X src1) src2)); + size(4); + format %{ "LSR $dst,$src1,$src2\t! Cast ptr $src1 to int and shift" %} + ins_encode %{ + __ lsr($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +// Overcomplicated unsigned math +instruct umull_lreg32_lreg32(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (MulL src1 src2)); + predicate(n->in(1)->Opcode() == Op_AndL && (((unsigned long long)n->in(1)->in(2)->find_long_con(-1))>>32)==0 && + n->in(2)->Opcode() == Op_AndL && (((unsigned long long)n->in(2)->in(2)->find_long_con(-1))>>32)==0); + + ins_cost(DEFAULT_COST*3/2); + size(4); + format %{ "UMULL $dst.lo,$dst.hi,$src1.lo,$src2.lo" %} + ins_encode %{ + __ umull($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register); + %} + ins_pipe(imul_reg_reg); +%} + +instruct umlal_reg32_reg32(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (AddL dst (MulL src1 src2))); + predicate( + n->in(2)->Opcode() == Op_MulL ? + n->in(2)->in(1)->Opcode() == Op_AndL && (((unsigned long long)n->in(2)->in(1)->in(2)->find_long_con(-1))>>32)==0 && + n->in(2)->in(2)->Opcode() == Op_AndL && (((unsigned long long)n->in(2)->in(2)->in(2)->find_long_con(-1))>>32)==0 : + n->in(1)->in(1)->Opcode() == Op_AndL && (((unsigned long long)n->in(1)->in(1)->in(2)->find_long_con(-1))>>32)==0 && + n->in(1)->in(2)->Opcode() == Op_AndL && (((unsigned long long)n->in(1)->in(2)->in(2)->find_long_con(-1))>>32)==0 + ); + + ins_cost(DEFAULT_COST*3/2); + size(4); + format %{ "UMLAL $dst.lo,$dst.hi,$src1.lo,$src2.lo" %} + ins_encode %{ + __ umlal($dst$$Register, $dst$$Register->successor(), $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +//----------Floating Point Arithmetic Instructions----------------------------- + +// Add float single precision +instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (AddF src1 src2)); + + size(4); + format %{ "FADDS $dst,$src1,$src2" %} + ins_encode %{ + __ vadd_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(faddF_reg_reg); +%} + +// Add float double precision +instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (AddD src1 src2)); + + size(4); + format %{ "FADDD $dst,$src1,$src2" %} + ins_encode %{ + __ vadd_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(faddD_reg_reg); +%} + +// Sub float single precision +instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (SubF src1 src2)); + + size(4); + format %{ "FSUBS $dst,$src1,$src2" %} + ins_encode %{ + __ vsub_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddF_reg_reg); +%} + +// Sub float double precision +instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (SubD src1 src2)); + + size(4); + format %{ "FSUBD $dst,$src1,$src2" %} + ins_encode %{ + __ vsub_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddD_reg_reg); +%} + +// Mul float single precision +instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (MulF src1 src2)); + + size(4); + format %{ "FMULS $dst,$src1,$src2" %} + ins_encode %{ + __ vmul_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(fmulF_reg_reg); +%} + +// Mul float double precision +instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (MulD src1 src2)); + + size(4); + format %{ "FMULD $dst,$src1,$src2" %} + ins_encode %{ + __ vmul_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(fmulD_reg_reg); +%} + +// Div float single precision +instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ + match(Set dst (DivF src1 src2)); + + size(4); + format %{ "FDIVS $dst,$src1,$src2" %} + ins_encode %{ + __ vdiv_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(fdivF_reg_reg); +%} + +// Div float double precision +instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ + match(Set dst (DivD src1 src2)); + + size(4); + format %{ "FDIVD $dst,$src1,$src2" %} + ins_encode %{ + __ vdiv_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + + ins_pipe(fdivD_reg_reg); +%} + +// Absolute float double precision +instruct absD_reg(regD dst, regD src) %{ + match(Set dst (AbsD src)); + + size(4); + format %{ "FABSd $dst,$src" %} + ins_encode %{ + __ vabs_f64($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(faddD_reg); +%} + +// Absolute float single precision +instruct absF_reg(regF dst, regF src) %{ + match(Set dst (AbsF src)); + format %{ "FABSs $dst,$src" %} + ins_encode %{ + __ vabs_f32($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(faddF_reg); +%} + +instruct negF_reg(regF dst, regF src) %{ + match(Set dst (NegF src)); + + size(4); + format %{ "FNEGs $dst,$src" %} + ins_encode %{ + __ vneg_f32($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(faddF_reg); +%} + +instruct negD_reg(regD dst, regD src) %{ + match(Set dst (NegD src)); + + format %{ "FNEGd $dst,$src" %} + ins_encode %{ + __ vneg_f64($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(faddD_reg); +%} + +// Sqrt float double precision +instruct sqrtF_reg_reg(regF dst, regF src) %{ + match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); + + size(4); + format %{ "FSQRTS $dst,$src" %} + ins_encode %{ + __ vsqrt_f32($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fdivF_reg_reg); +%} + +// Sqrt float double precision +instruct sqrtD_reg_reg(regD dst, regD src) %{ + match(Set dst (SqrtD src)); + + size(4); + format %{ "FSQRTD $dst,$src" %} + ins_encode %{ + __ vsqrt_f64($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fdivD_reg_reg); +%} + +//----------Logical Instructions----------------------------------------------- +// And Instructions +// Register And +instruct andI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (AndI src1 src2)); + + size(4); + format %{ "and_32 $dst,$src1,$src2" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct andshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AndI src1 (LShiftI src2 src3))); + + size(4); + format %{ "AND $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct andshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (AndI src1 (LShiftI src2 src3))); + + size(4); + format %{ "and_32 $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct andsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AndI src1 (RShiftI src2 src3))); + + size(4); + format %{ "AND $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct andsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (AndI src1 (RShiftI src2 src3))); + + size(4); + format %{ "and_32 $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct andshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (AndI src1 (URShiftI src2 src3))); + + size(4); + format %{ "AND $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct andshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (AndI src1 (URShiftI src2 src3))); + + size(4); + format %{ "and_32 $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate And +instruct andI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{ + match(Set dst (AndI src1 src2)); + + size(4); + format %{ "and_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +instruct andI_reg_limmn(iRegI dst, iRegI src1, limmIn src2) %{ + match(Set dst (AndI src1 src2)); + + size(4); + format %{ "bic $dst,$src1,~$src2\t! int" %} + ins_encode %{ + __ bic($dst$$Register, $src1$$Register, ~$src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +// Register And Long +instruct andL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (AndL src1 src2)); + + ins_cost(DEFAULT_COST); + size(8); + format %{ "AND $dst,$src1,$src2\t! long" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $src2$$Register); + __ andr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct andL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{ + match(Set dst (AndL src1 con)); + ins_cost(DEFAULT_COST); + size(8); + format %{ "AND $dst,$src1,$con\t! long" %} + ins_encode %{ + __ andr($dst$$Register, $src1$$Register, $con$$constant); + __ andr($dst$$Register->successor(), $src1$$Register->successor(), 0u); + %} + ins_pipe(ialu_reg_imm); +%} + +// Or Instructions +// Register Or +instruct orI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (OrI src1 src2)); + + size(4); + format %{ "orr_32 $dst,$src1,$src2\t! int" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct orshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (OrI src1 (LShiftI src2 src3))); + + size(4); + format %{ "OR $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct orshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (OrI src1 (LShiftI src2 src3))); + + size(4); + format %{ "orr_32 $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct orsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (OrI src1 (RShiftI src2 src3))); + + size(4); + format %{ "OR $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct orsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (OrI src1 (RShiftI src2 src3))); + + size(4); + format %{ "orr_32 $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct orshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (OrI src1 (URShiftI src2 src3))); + + size(4); + format %{ "OR $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct orshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (OrI src1 (URShiftI src2 src3))); + + size(4); + format %{ "orr_32 $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate Or +instruct orI_reg_limm(iRegI dst, iRegI src1, limmI src2) %{ + match(Set dst (OrI src1 src2)); + + size(4); + format %{ "orr_32 $dst,$src1,$src2" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} +// TODO: orn_32 with limmIn + +// Register Or Long +instruct orL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (OrL src1 src2)); + + ins_cost(DEFAULT_COST); + size(8); + format %{ "OR $dst.lo,$src1.lo,$src2.lo\t! long\n\t" + "OR $dst.hi,$src1.hi,$src2.hi" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register); + __ orr($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct orL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{ + match(Set dst (OrL src1 con)); + ins_cost(DEFAULT_COST); + size(8); + format %{ "OR $dst.lo,$src1.lo,$con\t! long\n\t" + "OR $dst.hi,$src1.hi,$con" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $con$$constant); + __ orr($dst$$Register->successor(), $src1$$Register->successor(), 0u); + %} + ins_pipe(ialu_reg_imm); +%} + +#ifdef TODO +// Use SPRegP to match Rthread (TLS register) without spilling. +// Use store_ptr_RegP to match Rthread (TLS register) without spilling. +// Use sp_ptr_RegP to match Rthread (TLS register) without spilling. +instruct orI_reg_castP2X(iRegI dst, iRegI src1, sp_ptr_RegP src2) %{ + match(Set dst (OrI src1 (CastP2X src2))); + size(4); + format %{ "OR $dst,$src1,$src2" %} + ins_encode %{ + __ orr($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} +#endif + +// Xor Instructions +// Register Xor +instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{ + match(Set dst (XorI src1 src2)); + + size(4); + format %{ "eor_32 $dst,$src1,$src2" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct xorshlI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (XorI src1 (LShiftI src2 src3))); + + size(4); + format %{ "XOR $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct xorshlI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (XorI src1 (LShiftI src2 src3))); + + size(4); + format %{ "eor_32 $dst,$src1,$src2<<$src3" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsl($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct xorsarI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (XorI src1 (RShiftI src2 src3))); + + size(4); + format %{ "XOR $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct xorsarI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (XorI src1 (RShiftI src2 src3))); + + size(4); + format %{ "eor_32 $dst,$src1,$src2>>$src3" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register, asr($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct xorshrI_reg_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3) %{ + match(Set dst (XorI src1 (URShiftI src2 src3))); + + size(4); + format %{ "XOR $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct xorshrI_reg_reg_imm(iRegI dst, iRegI src1, iRegI src2, immU5 src3) %{ + match(Set dst (XorI src1 (URShiftI src2 src3))); + + size(4); + format %{ "eor_32 $dst,$src1,$src2>>>$src3" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register, lsr($src3$$constant)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Immediate Xor +instruct xorI_reg_imm(iRegI dst, iRegI src1, limmI src2) %{ + match(Set dst (XorI src1 src2)); + + size(4); + format %{ "eor_32 $dst,$src1,$src2" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$constant); + %} + ins_pipe(ialu_reg_imm); +%} + +// Register Xor Long +instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{ + match(Set dst (XorL src1 src2)); + ins_cost(DEFAULT_COST); + size(8); + format %{ "XOR $dst.hi,$src1.hi,$src2.hi\t! long\n\t" + "XOR $dst.lo,$src1.lo,$src2.lo\t! long" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $src2$$Register); + __ eor($dst$$Register->successor(), $src1$$Register->successor(), $src2$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct xorL_reg_immRot(iRegL dst, iRegL src1, immLlowRot con) %{ + match(Set dst (XorL src1 con)); + ins_cost(DEFAULT_COST); + size(8); + format %{ "XOR $dst.hi,$src1.hi,$con\t! long\n\t" + "XOR $dst.lo,$src1.lo,0\t! long" %} + ins_encode %{ + __ eor($dst$$Register, $src1$$Register, $con$$constant); + __ eor($dst$$Register->successor(), $src1$$Register->successor(), 0u); + %} + ins_pipe(ialu_reg_imm); +%} + +//----------Convert to Boolean------------------------------------------------- +instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{ + match(Set dst (Conv2B src)); + effect(KILL ccr); + size(12); + ins_cost(DEFAULT_COST*2); + format %{ "TST $src,$src \n\t" + "MOV $dst, 0 \n\t" + "MOV.ne $dst, 1" %} + ins_encode %{ // FIXME: can do better? + __ tst($src$$Register, $src$$Register); + __ mov($dst$$Register, 0); + __ mov($dst$$Register, 1, Assembler::NE); + %} + ins_pipe(ialu_reg_ialu); +%} + +instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{ + match(Set dst (Conv2B src)); + effect(KILL ccr); + size(12); + ins_cost(DEFAULT_COST*2); + format %{ "TST $src,$src \n\t" + "MOV $dst, 0 \n\t" + "MOV.ne $dst, 1" %} + ins_encode %{ + __ tst($src$$Register, $src$$Register); + __ mov($dst$$Register, 0); + __ mov($dst$$Register, 1, Assembler::NE); + %} + ins_pipe(ialu_reg_ialu); +%} + +instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{ + match(Set dst (CmpLTMask p q)); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*3); + format %{ "CMP $p,$q\n\t" + "MOV $dst, #0\n\t" + "MOV.lt $dst, #-1" %} + ins_encode %{ + __ cmp($p$$Register, $q$$Register); + __ mov_i($dst$$Register, 0); + __ mvn_i($dst$$Register, 0, Assembler::LT); + %} + ins_pipe(ialu_reg_reg_ialu); +%} + +instruct cmpLTMask_reg_imm( iRegI dst, iRegI p, aimmI q, flagsReg ccr ) %{ + match(Set dst (CmpLTMask p q)); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*3); + format %{ "CMP $p,$q\n\t" + "MOV $dst, #0\n\t" + "MOV.lt $dst, #-1" %} + ins_encode %{ + __ cmp($p$$Register, $q$$constant); + __ mov_i($dst$$Register, 0); + __ mvn_i($dst$$Register, 0, Assembler::LT); + %} + ins_pipe(ialu_reg_reg_ialu); +%} + +instruct cadd_cmpLTMask3( iRegI p, iRegI q, iRegI y, iRegI z, flagsReg ccr ) %{ + match(Set z (AddI (AndI (CmpLTMask p q) y) z)); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*2); + format %{ "CMP $p,$q\n\t" + "ADD.lt $z,$y,$z" %} + ins_encode %{ + __ cmp($p$$Register, $q$$Register); + __ add($z$$Register, $y$$Register, $z$$Register, Assembler::LT); + %} + ins_pipe( cadd_cmpltmask ); +%} + +// FIXME: remove unused "dst" +instruct cadd_cmpLTMask4( iRegI dst, iRegI p, aimmI q, iRegI y, iRegI z, flagsReg ccr ) %{ + match(Set z (AddI (AndI (CmpLTMask p q) y) z)); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*2); + format %{ "CMP $p,$q\n\t" + "ADD.lt $z,$y,$z" %} + ins_encode %{ + __ cmp($p$$Register, $q$$constant); + __ add($z$$Register, $y$$Register, $z$$Register, Assembler::LT); + %} + ins_pipe( cadd_cmpltmask ); +%} + +instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, flagsReg ccr ) %{ + match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); + effect( KILL ccr ); + ins_cost(DEFAULT_COST*2); + format %{ "SUBS $p,$p,$q\n\t" + "ADD.lt $p,$y,$p" %} + ins_encode %{ + __ subs($p$$Register, $p$$Register, $q$$Register); + __ add($p$$Register, $y$$Register, $p$$Register, Assembler::LT); + %} + ins_pipe( cadd_cmpltmask ); +%} + +//----------Arithmetic Conversion Instructions--------------------------------- +// The conversions operations are all Alpha sorted. Please keep it that way! + +instruct convD2F_reg(regF dst, regD src) %{ + match(Set dst (ConvD2F src)); + size(4); + format %{ "FCVTSD $dst,$src" %} + ins_encode %{ + __ vcvt_f32_f64($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fcvtD2F); +%} + +// Convert a double to an int in a float register. +// If the double is a NAN, stuff a zero in instead. + +instruct convD2I_reg_reg(iRegI dst, regD src, regF tmp) %{ + match(Set dst (ConvD2I src)); + effect( TEMP tmp ); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + format %{ "FTOSIZD $tmp,$src\n\t" + "FMRS $dst, $tmp" %} + ins_encode %{ + __ vcvt_s32_f64($tmp$$FloatRegister, $src$$FloatRegister); + __ vmov_f32($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(fcvtD2I); +%} + +// Convert a double to a long in a double register. +// If the double is a NAN, stuff a zero in instead. + +// Double to Long conversion +instruct convD2L_reg(R0R1RegL dst, regD src) %{ + match(Set dst (ConvD2L src)); + effect(CALL); + ins_cost(MEMORY_REF_COST); // FIXME + format %{ "convD2L $dst,$src\t ! call to SharedRuntime::d2l" %} + ins_encode %{ +#ifndef HARD_FLOAT_CC + __ vmov_f64($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister); +#else + if ($src$$FloatRegister != d0) { + __ vmov_f64(d0, $src$$FloatRegister); + } +#endif + address target = CAST_FROM_FN_PTR(address, SharedRuntime::d2l); + __ call(target, relocInfo::runtime_call_type); + %} + ins_pipe(fcvtD2L); +%} + +instruct convF2D_reg(regD dst, regF src) %{ + match(Set dst (ConvF2D src)); + size(4); + format %{ "FCVTDS $dst,$src" %} + ins_encode %{ + __ vcvt_f64_f32($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(fcvtF2D); +%} + +instruct convF2I_reg_reg(iRegI dst, regF src, regF tmp) %{ + match(Set dst (ConvF2I src)); + effect( TEMP tmp ); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + size(8); + format %{ "FTOSIZS $tmp,$src\n\t" + "FMRS $dst, $tmp" %} + ins_encode %{ + __ vcvt_s32_f32($tmp$$FloatRegister, $src$$FloatRegister); + __ vmov_f32($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(fcvtF2I); +%} + +// Float to Long conversion +instruct convF2L_reg(R0R1RegL dst, regF src, R0RegI arg1) %{ + match(Set dst (ConvF2L src)); + ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST); // FIXME + effect(CALL); + format %{ "convF2L $dst,$src\t! call to SharedRuntime::f2l" %} + ins_encode %{ +#ifndef HARD_FLOAT_CC + __ vmov_f32($arg1$$Register, $src$$FloatRegister); +#else + if($src$$FloatRegister != f0) { + __ vmov_f32(f0, $src$$FloatRegister); + } +#endif + address target = CAST_FROM_FN_PTR(address, SharedRuntime::f2l); + __ call(target, relocInfo::runtime_call_type); + %} + ins_pipe(fcvtF2L); +%} + +instruct convI2D_reg_reg(iRegI src, regD_low dst) %{ + match(Set dst (ConvI2D src)); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME + size(8); + format %{ "FMSR $dst,$src \n\t" + "FSITOD $dst $dst"%} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$Register); + __ vcvt_f64_s32($dst$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe(fcvtI2D); +%} + +instruct convI2F_reg_reg( regF dst, iRegI src ) %{ + match(Set dst (ConvI2F src)); + ins_cost(DEFAULT_COST + MEMORY_REF_COST); // FIXME + size(8); + format %{ "FMSR $dst,$src \n\t" + "FSITOS $dst, $dst"%} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$Register); + __ vcvt_f32_s32($dst$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe(fcvtI2F); +%} + +instruct convI2L_reg(iRegL dst, iRegI src) %{ + match(Set dst (ConvI2L src)); + size(8); + format %{ "MOV $dst.lo, $src \n\t" + "ASR $dst.hi,$src,31\t! int->long" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register); + __ mov($dst$$Register->successor(), $src$$Register, asr(31)); + %} + ins_pipe(ialu_reg_reg); +%} + +// Zero-extend convert int to long +instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{ + match(Set dst (AndL (ConvI2L src) mask) ); + size(8); + format %{ "MOV $dst.lo,$src.lo\t! zero-extend int to long\n\t" + "MOV $dst.hi, 0"%} + ins_encode %{ + __ mov($dst$$Register, $src$$Register); + __ mov($dst$$Register->successor(), 0); + %} + ins_pipe(ialu_reg_reg); +%} + +// Zero-extend long +instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{ + match(Set dst (AndL src mask) ); + size(8); + format %{ "MOV $dst.lo,$src.lo\t! zero-extend long\n\t" + "MOV $dst.hi, 0"%} + ins_encode %{ + __ mov($dst$$Register, $src$$Register); + __ mov($dst$$Register->successor(), 0); + %} + ins_pipe(ialu_reg_reg); +%} + +instruct MoveF2I_reg_reg(iRegI dst, regF src) %{ + match(Set dst (MoveF2I src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); // FIXME + + size(4); + format %{ "FMRS $dst,$src\t! MoveF2I" %} + ins_encode %{ + __ vmov_f32($dst$$Register, $src$$FloatRegister); + %} + ins_pipe(iload_mem); // FIXME +%} + +instruct MoveI2F_reg_reg(regF dst, iRegI src) %{ + match(Set dst (MoveI2F src)); + ins_cost(MEMORY_REF_COST); // FIXME + + size(4); + format %{ "FMSR $dst,$src\t! MoveI2F" %} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(iload_mem); // FIXME +%} + +instruct MoveD2L_reg_reg(iRegL dst, regD src) %{ + match(Set dst (MoveD2L src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); // FIXME + + size(4); + format %{ "FMRRD $dst,$src\t! MoveD2L" %} + ins_encode %{ + __ vmov_f64($dst$$Register, $dst$$Register->successor(), $src$$FloatRegister); + %} + ins_pipe(iload_mem); // FIXME +%} + +instruct MoveL2D_reg_reg(regD dst, iRegL src) %{ + match(Set dst (MoveL2D src)); + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); // FIXME + + size(4); + format %{ "FMDRR $dst,$src\t! MoveL2D" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} + +//----------- +// Long to Double conversion + +// Magic constant, 0x43300000 +instruct loadConI_x43300000(iRegI dst) %{ + effect(DEF dst); + size(8); + format %{ "MOV_SLOW $dst,0x43300000\t! 2^52" %} + ins_encode %{ + __ mov($dst$$Register, 0x43300000); + %} + ins_pipe(ialu_none); +%} + +// Magic constant, 0x41f00000 +instruct loadConI_x41f00000(iRegI dst) %{ + effect(DEF dst); + size(8); + format %{ "MOV_SLOW $dst, 0x41f00000\t! 2^32" %} + ins_encode %{ + __ mov($dst$$Register, 0x41f00000); + %} + ins_pipe(ialu_none); +%} + +instruct loadConI_x0(iRegI dst) %{ + effect(DEF dst); + size(4); + format %{ "MOV $dst, 0x0\t! 0" %} + ins_encode %{ + __ mov($dst$$Register, 0); + %} + ins_pipe(ialu_none); +%} + +// Construct a double from two float halves +instruct regDHi_regDLo_to_regD(regD_low dst, regD_low src1, regD_low src2) %{ + effect(DEF dst, USE src1, USE src2); + size(8); + format %{ "FCPYS $dst.hi,$src1.hi\n\t" + "FCPYS $dst.lo,$src2.lo" %} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE), $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE)); + __ vmov_f32($dst$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddD_reg_reg); +%} + +// Convert integer in high half of a double register (in the lower half of +// the double register file) to double +instruct convI2D_regDHi_regD(regD dst, regD_low src) %{ + effect(DEF dst, USE src); + size(4); + format %{ "FSITOD $dst,$src" %} + ins_encode %{ + __ vcvt_f64_s32($dst$$FloatRegister, $src$$FloatRegister->successor(FloatRegisterImpl::SINGLE));// TODO verify the samentics is the same as was before + %} + ins_pipe(fcvtLHi2D); +%} + +// Add float double precision +instruct addD_regD_regD(regD dst, regD src1, regD src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "FADDD $dst,$src1,$src2" %} + ins_encode %{ + __ vadd_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddD_reg_reg); +%} + +// Sub float double precision +instruct subD_regD_regD(regD dst, regD src1, regD src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "FSUBD $dst,$src1,$src2" %} + ins_encode %{ + __ vsub_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(faddD_reg_reg); +%} + +// Mul float double precision +instruct mulD_regD_regD(regD dst, regD src1, regD src2) %{ + effect(DEF dst, USE src1, USE src2); + size(4); + format %{ "FMULD $dst,$src1,$src2" %} + ins_encode %{ + __ vmul_f64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe(fmulD_reg_reg); +%} + +instruct regL_to_regD(regD dst, iRegL src) %{ + // No match rule to avoid chain rule match. + effect(DEF dst, USE src); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FMDRR $dst,$src\t! regL to regD" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register->successor()); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} + +instruct regI_regI_to_regD(regD dst, iRegI src1, iRegI src2) %{ + // No match rule to avoid chain rule match. + effect(DEF dst, USE src1, USE src2); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FMDRR $dst,$src1,$src2\t! regI,regI to regD" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src1$$Register, $src2$$Register); + %} + ins_pipe(ialu_reg_reg); // FIXME +%} + +instruct convL2D_reg_slow_fxtof(regD dst, iRegL src) %{ + match(Set dst (ConvL2D src)); + ins_cost(DEFAULT_COST*8 + MEMORY_REF_COST*6); // FIXME + + expand %{ + regD_low tmpsrc; + iRegI ix43300000; + iRegI ix41f00000; + iRegI ix0; + regD_low dx43300000; + regD dx41f00000; + regD tmp1; + regD_low tmp2; + regD tmp3; + regD tmp4; + + regL_to_regD(tmpsrc, src); + + loadConI_x43300000(ix43300000); + loadConI_x41f00000(ix41f00000); + loadConI_x0(ix0); + + regI_regI_to_regD(dx43300000, ix0, ix43300000); + regI_regI_to_regD(dx41f00000, ix0, ix41f00000); + + convI2D_regDHi_regD(tmp1, tmpsrc); + regDHi_regDLo_to_regD(tmp2, dx43300000, tmpsrc); + subD_regD_regD(tmp3, tmp2, dx43300000); + mulD_regD_regD(tmp4, tmp1, dx41f00000); + addD_regD_regD(dst, tmp3, tmp4); + %} +%} + +instruct convL2I_reg(iRegI dst, iRegL src) %{ + match(Set dst (ConvL2I src)); + size(4); + format %{ "MOV $dst,$src.lo\t! long->int" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_move_reg_I_to_L); +%} + +// Register Shift Right Immediate +instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{ + match(Set dst (ConvL2I (RShiftL src cnt))); + size(4); + format %{ "ASR $dst,$src.hi,($cnt - 32)\t! long->int or mov if $cnt==32" %} + ins_encode %{ + if ($cnt$$constant == 32) { + __ mov($dst$$Register, $src$$Register->successor()); + } else { + __ mov($dst$$Register, $src$$Register->successor(), asr($cnt$$constant - 32)); + } + %} + ins_pipe(ialu_reg_imm); +%} + + +//----------Control Flow Instructions------------------------------------------ +// Compare Instructions +// Compare Integers +instruct compI_iReg(flagsReg icc, iRegI op1, iRegI op2) %{ + match(Set icc (CmpI op1 op2)); + effect( DEF icc, USE op1, USE op2 ); + + size(4); + format %{ "cmp_32 $op1,$op2\t! int" %} + ins_encode %{ + __ cmp($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compU_iReg(flagsRegU icc, iRegI op1, iRegI op2) %{ + match(Set icc (CmpU op1 op2)); + + size(4); + format %{ "cmp_32 $op1,$op2\t! unsigned int" %} + ins_encode %{ + __ cmp($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compI_iReg_immneg(flagsReg icc, iRegI op1, aimmIneg op2) %{ + match(Set icc (CmpI op1 op2)); + effect( DEF icc, USE op1 ); + + size(4); + format %{ "cmn_32 $op1,-$op2\t! int" %} + ins_encode %{ + __ cmn($op1$$Register, -$op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm); +%} + +instruct compI_iReg_imm(flagsReg icc, iRegI op1, aimmI op2) %{ + match(Set icc (CmpI op1 op2)); + effect( DEF icc, USE op1 ); + + size(4); + format %{ "cmp_32 $op1,$op2\t! int" %} + ins_encode %{ + __ cmp($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm); +%} + +instruct testI_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 op2) zero)); + size(4); + format %{ "tst $op2,$op1" %} + + ins_encode %{ + __ tst($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +instruct testshlI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero)); + size(4); + format %{ "TST $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst($op1$$Register, $op2$$Register, lsl($op3$$Register)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +instruct testshlI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (LShiftI op2 op3)) zero)); + size(4); + format %{ "tst $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst($op1$$Register, $op2$$Register, lsl($op3$$constant)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +instruct testsarI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero)); + size(4); + format %{ "TST $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst($op1$$Register, $op2$$Register, asr($op3$$Register)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +instruct testsarI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (RShiftI op2 op3)) zero)); + size(4); + format %{ "tst $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst($op1$$Register, $op2$$Register, asr($op3$$constant)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +instruct testshrI_reg_reg_reg( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, iRegI op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero)); + size(4); + format %{ "TST $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst($op1$$Register, $op2$$Register, lsr($op3$$Register)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +instruct testshrI_reg_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, iRegI op2, immU5 op3, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 (URShiftI op2 op3)) zero)); + size(4); + format %{ "tst $op2,$op1<<$op3" %} + + ins_encode %{ + __ tst($op1$$Register, $op2$$Register, lsr($op3$$constant)); + %} + ins_pipe(ialu_cconly_reg_reg_zero); +%} + +instruct testI_reg_imm( flagsReg_EQNELTGE icc, iRegI op1, limmI op2, immI0 zero ) %{ + match(Set icc (CmpI (AndI op1 op2) zero)); + size(4); + format %{ "tst $op2,$op1" %} + + ins_encode %{ + __ tst($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm_zero); +%} + +instruct compL_reg_reg_LTGE(flagsRegL_LTGE xcc, iRegL op1, iRegL op2, iRegI tmp) %{ + match(Set xcc (CmpL op1 op2)); + effect( DEF xcc, USE op1, USE op2, TEMP tmp ); + + size(8); + format %{ "CMP $op1.low,$op2.low\t\t! long\n\t" + "SBCS $tmp,$op1.hi,$op2.hi" %} + ins_encode %{ + __ cmp($op1$$Register, $op2$$Register); + __ sbcs($tmp$$Register, $op1$$Register->successor(), $op2$$Register->successor()); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compL_reg_reg_EQNE(flagsRegL_EQNE xcc, iRegL op1, iRegL op2) %{ + match(Set xcc (CmpL op1 op2)); + effect( DEF xcc, USE op1, USE op2 ); + + size(8); + format %{ "TEQ $op1.hi,$op2.hi\t\t! long\n\t" + "TEQ.eq $op1.lo,$op2.lo" %} + ins_encode %{ + __ teq($op1$$Register->successor(), $op2$$Register->successor()); + __ teq($op1$$Register, $op2$$Register, Assembler::EQ); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compL_reg_reg_LEGT(flagsRegL_LEGT xcc, iRegL op1, iRegL op2, iRegI tmp) %{ + match(Set xcc (CmpL op1 op2)); + effect( DEF xcc, USE op1, USE op2, TEMP tmp ); + + size(8); + format %{ "CMP $op2.low,$op1.low\t\t! long\n\t" + "SBCS $tmp,$op2.hi,$op1.hi" %} + ins_encode %{ + __ cmp($op2$$Register, $op1$$Register); + __ sbcs($tmp$$Register, $op2$$Register->successor(), $op1$$Register->successor()); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compUL_reg_reg(flagsRegUL xcc, iRegL op1, iRegL op2) %{ + match(Set xcc (CmpUL op1 op2)); + effect( DEF xcc, USE op1, USE op2 ); + + size(8); + format %{ "CMP $op1.hi,$op2.hi\t\t! long\n\t" + "CMP.eq $op1.low,$op2.low" %} + ins_encode %{ + __ cmp($op1$$Register->successor(), $op2$$Register->successor()); + __ cmp($op1$$Register, $op2$$Register, Assembler::EQ); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct compL_reg_con_LTGE(flagsRegL_LTGE xcc, iRegL op1, immLlowRot con, iRegI tmp) %{ + match(Set xcc (CmpL op1 con)); + effect( DEF xcc, USE op1, USE con, TEMP tmp ); + + size(8); + format %{ "CMP $op1.low,$con\t\t! long\n\t" + "SBCS $tmp,$op1.hi,0" %} + ins_encode %{ + __ cmp($op1$$Register, (int)$con$$constant); + __ sbcs($tmp$$Register, $op1$$Register->successor(), 0); + %} + + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compUL_reg_con(flagsRegUL xcc, iRegL op1, immLlowRot con ) %{ + match(Set xcc (CmpUL op1 con)); + effect( DEF xcc, USE op1, USE con ); + + size(8); + format %{ "CMP $op1.hi,0\t\t! long\n\t" + "CMP.eq $op1.low,$con" %} + ins_encode %{ + __ cmp($op1$$Register->successor(), 0); + __ cmp($op1$$Register, (int)$con$$constant, Assembler::EQ); + %} + + ins_pipe(ialu_cconly_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct compL_reg_con_EQNE(flagsRegL_EQNE xcc, iRegL op1, immLlowRot con) %{ + match(Set xcc (CmpL op1 con)); + effect( DEF xcc, USE op1, USE con ); + + size(8); + format %{ "TEQ $op1.hi,0\t\t! long\n\t" + "TEQ.eq $op1.lo,$con" %} + ins_encode %{ + __ teq($op1$$Register->successor(), 0); + __ teq($op1$$Register, (int)$con$$constant, Assembler::EQ); + %} + + ins_pipe(ialu_cconly_reg_reg); +%} + +// TODO: try immLRot2 instead, (0, $con$$constant) becomes +// (hi($con$$constant), lo($con$$constant)) becomes +instruct compL_reg_con_LEGT(flagsRegL_LEGT xcc, iRegL op1, immLlowRot con, iRegL tmp) %{ + match(Set xcc (CmpL op1 con)); + effect( DEF xcc, USE op1, USE con, TEMP tmp ); + + size(8); + format %{ "RSBS $tmp,$op1.low,$con\t\t! long\n\t" + "RSCS $tmp,$op1.hi,0" %} + ins_encode %{ + __ rsbs($tmp$$Register, $op1$$Register, (long)$con$$constant); + __ rscs($tmp$$Register->successor(), $op1$$Register->successor(), 0); + %} + + ins_pipe(ialu_cconly_reg_reg); +%} + +/* instruct testL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2, immL0 zero) %{ */ +/* match(Set xcc (CmpL (AndL op1 op2) zero)); */ +/* ins_encode %{ */ +/* __ stop("testL_reg_reg unimplemented"); */ +/* %} */ +/* ins_pipe(ialu_cconly_reg_reg); */ +/* %} */ + +/* // useful for checking the alignment of a pointer: */ +/* instruct testL_reg_con(flagsRegL xcc, iRegL op1, immLlowRot con, immL0 zero) %{ */ +/* match(Set xcc (CmpL (AndL op1 con) zero)); */ +/* ins_encode %{ */ +/* __ stop("testL_reg_con unimplemented"); */ +/* %} */ +/* ins_pipe(ialu_cconly_reg_reg); */ +/* %} */ + +instruct compU_iReg_imm(flagsRegU icc, iRegI op1, aimmU31 op2 ) %{ + match(Set icc (CmpU op1 op2)); + + size(4); + format %{ "cmp_32 $op1,$op2\t! unsigned" %} + ins_encode %{ + __ cmp($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm); +%} + +// Compare Pointers +instruct compP_iRegP(flagsRegP pcc, iRegP op1, iRegP op2 ) %{ + match(Set pcc (CmpP op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! ptr" %} + ins_encode %{ + __ cmp($op1$$Register, $op2$$Register); + %} + ins_pipe(ialu_cconly_reg_reg); +%} + +instruct compP_iRegP_imm(flagsRegP pcc, iRegP op1, aimmP op2 ) %{ + match(Set pcc (CmpP op1 op2)); + + size(4); + format %{ "CMP $op1,$op2\t! ptr" %} + ins_encode %{ + assert($op2$$constant == 0 || _opnds[2]->constant_reloc() == relocInfo::none, "reloc in cmp?"); + __ cmp($op1$$Register, $op2$$constant); + %} + ins_pipe(ialu_cconly_reg_imm); +%} + +//----------Max and Min-------------------------------------------------------- +// Min Instructions +// Conditional move for min +instruct cmovI_reg_lt( iRegI op2, iRegI op1, flagsReg icc ) %{ + effect( USE_DEF op2, USE op1, USE icc ); + + size(4); + format %{ "MOV.lt $op2,$op1\t! min" %} + ins_encode %{ + __ mov($op2$$Register, $op1$$Register, Assembler::LT); + %} + ins_pipe(ialu_reg_flags); +%} + +// Min Register with Register. +instruct minI_eReg(iRegI op1, iRegI op2) %{ + match(Set op2 (MinI op1 op2)); + ins_cost(DEFAULT_COST*2); + expand %{ + flagsReg icc; + compI_iReg(icc,op1,op2); + cmovI_reg_lt(op2,op1,icc); + %} +%} + +// Max Instructions +// Conditional move for max +instruct cmovI_reg_gt( iRegI op2, iRegI op1, flagsReg icc ) %{ + effect( USE_DEF op2, USE op1, USE icc ); + format %{ "MOV.gt $op2,$op1\t! max" %} + ins_encode %{ + __ mov($op2$$Register, $op1$$Register, Assembler::GT); + %} + ins_pipe(ialu_reg_flags); +%} + +// Max Register with Register +instruct maxI_eReg(iRegI op1, iRegI op2) %{ + match(Set op2 (MaxI op1 op2)); + ins_cost(DEFAULT_COST*2); + expand %{ + flagsReg icc; + compI_iReg(icc,op1,op2); + cmovI_reg_gt(op2,op1,icc); + %} +%} + + +//----------Float Compares---------------------------------------------------- +// Compare floating, generate condition code +instruct cmpF_cc(flagsRegF fcc, flagsReg icc, regF src1, regF src2) %{ + match(Set icc (CmpF src1 src2)); + effect(KILL fcc); + + size(8); + format %{ "FCMPs $src1,$src2\n\t" + "FMSTAT" %} + ins_encode %{ + __ vcmp_f32($src1$$FloatRegister, $src2$$FloatRegister); + __ get_fpsr(); + %} + ins_pipe(faddF_fcc_reg_reg_zero); +%} + +instruct cmpF0_cc(flagsRegF fcc, flagsReg icc, regF src1, immF0 src2) %{ + match(Set icc (CmpF src1 src2)); + effect(KILL fcc); + + size(8); + format %{ "FCMPs $src1,$src2\n\t" + "FMSTAT" %} + ins_encode %{ + __ vcmp_f32($src1$$FloatRegister, 0); + __ get_fpsr(); + %} + ins_pipe(faddF_fcc_reg_reg_zero); +%} + +instruct cmpD_cc(flagsRegF fcc, flagsReg icc, regD src1, regD src2) %{ + match(Set icc (CmpD src1 src2)); + effect(KILL fcc); + + size(8); + format %{ "FCMPd $src1,$src2 \n\t" + "FMSTAT" %} + ins_encode %{ + __ vcmp_f64($src1$$FloatRegister, $src2$$FloatRegister); + __ get_fpsr(); + %} + ins_pipe(faddD_fcc_reg_reg_zero); +%} + +instruct cmpD0_cc(flagsRegF fcc, flagsReg icc, regD src1, immD0 src2) %{ + match(Set icc (CmpD src1 src2)); + effect(KILL fcc); + + size(8); + format %{ "FCMPZd $src1,$src2 \n\t" + "FMSTAT" %} + ins_encode %{ + __ vcmp_f64($src1$$FloatRegister, 0); + __ get_fpsr(); + %} + ins_pipe(faddD_fcc_reg_reg_zero); +%} + +// Compare floating, generate -1,0,1 +instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsRegF fcc) %{ + match(Set dst (CmpF3 src1 src2)); + effect(KILL fcc); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME + size(20); + // same number of instructions as code using conditional moves but + // doesn't kill integer condition register + format %{ "FCMPs $dst,$src1,$src2 \n\t" + "VMRS $dst, FPSCR \n\t" + "OR $dst, $dst, 0x08000000 \n\t" + "EOR $dst, $dst, $dst << 3 \n\t" + "MOV $dst, $dst >> 30" %} + ins_encode %{ + __ vcmp_f32($src1$$FloatRegister, $src2$$FloatRegister); + __ floating_cmp($dst$$Register); + %} + ins_pipe( floating_cmp ); +%} + +instruct cmpF0_reg(iRegI dst, regF src1, immF0 src2, flagsRegF fcc) %{ + match(Set dst (CmpF3 src1 src2)); + effect(KILL fcc); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME + size(20); + // same number of instructions as code using conditional moves but + // doesn't kill integer condition register + format %{ "FCMPZs $dst,$src1,$src2 \n\t" + "VMRS $dst, FPSCR \n\t" + "OR $dst, $dst, 0x08000000 \n\t" + "EOR $dst, $dst, $dst << 3 \n\t" + "MOV $dst, $dst >> 30" %} + ins_encode %{ + __ vcmp_f32($src1$$FloatRegister, 0); + __ floating_cmp($dst$$Register); + %} + ins_pipe( floating_cmp ); +%} + +instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsRegF fcc) %{ + match(Set dst (CmpD3 src1 src2)); + effect(KILL fcc); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME + size(20); + // same number of instructions as code using conditional moves but + // doesn't kill integer condition register + format %{ "FCMPd $dst,$src1,$src2 \n\t" + "VMRS $dst, FPSCR \n\t" + "OR $dst, $dst, 0x08000000 \n\t" + "EOR $dst, $dst, $dst << 3 \n\t" + "MOV $dst, $dst >> 30" %} + ins_encode %{ + __ vcmp_f64($src1$$FloatRegister, $src2$$FloatRegister); + __ floating_cmp($dst$$Register); + %} + ins_pipe( floating_cmp ); +%} + +instruct cmpD0_reg(iRegI dst, regD src1, immD0 src2, flagsRegF fcc) %{ + match(Set dst (CmpD3 src1 src2)); + effect(KILL fcc); + ins_cost(DEFAULT_COST*3+BRANCH_COST*3); // FIXME + size(20); + // same number of instructions as code using conditional moves but + // doesn't kill integer condition register + format %{ "FCMPZd $dst,$src1,$src2 \n\t" + "VMRS $dst, FPSCR \n\t" + "OR $dst, $dst, 0x08000000 \n\t" + "EOR $dst, $dst, $dst << 3 \n\t" + "MOV $dst, $dst >> 30" %} + ins_encode %{ + __ vcmp_f64($src1$$FloatRegister, 0); + __ floating_cmp($dst$$Register); + %} + ins_pipe( floating_cmp ); +%} + +//----------Branches--------------------------------------------------------- +// Jump +// (compare 'operand indIndex' and 'instruct addP_reg_reg' above) +// FIXME +instruct jumpXtnd(iRegX switch_val, iRegP tmp) %{ + match(Jump switch_val); + effect(TEMP tmp); + ins_cost(350); + format %{ "ADD $tmp, $constanttablebase, $switch_val\n\t" + "LDR $tmp,[$tmp + $constantoffset]\n\t" + "BX $tmp" %} + size(20); + ins_encode %{ + Register table_reg; + Register label_reg = $tmp$$Register; + if (constant_offset() == 0) { + table_reg = $constanttablebase; + __ ldr(label_reg, Address(table_reg, $switch_val$$Register)); + } else { + table_reg = $tmp$$Register; + int offset = $constantoffset; + if (is_memoryP(offset)) { + __ add(table_reg, $constanttablebase, $switch_val$$Register); + __ ldr(label_reg, Address(table_reg, offset)); + } else { + __ mov(table_reg, $constantoffset); + __ add(table_reg, $constanttablebase, table_reg); + __ ldr(label_reg, Address(table_reg, $switch_val$$Register)); + } + } + __ b(label_reg); // ldr + b better than ldr to PC for branch predictor? + // __ ldr(PC, Address($table$$Register, $switch_val$$Register)); + %} + ins_pipe(ialu_reg_reg); +%} + +// // Direct Branch. +instruct branch(label labl) %{ + match(Goto); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B $labl" %} + ins_encode %{ + __ b(*($labl$$label)); + %} + ins_pipe(br); +%} + +// Conditional Direct Branch +instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{ + match(If cmp icc); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $icc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchCon_EQNELTGE(cmpOp0 cmp, flagsReg_EQNELTGE icc, label labl) %{ + match(If cmp icc); + effect(USE labl); + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $icc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{ + match(If cmp icc); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $icc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{ + match(If cmp pcc); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $pcc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchConL_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, label labl) %{ + match(If cmp xcc); + effect(USE labl); + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $xcc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchConL_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, label labl) %{ + match(If cmp xcc); + effect(USE labl); + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $xcc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchConL_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, label labl) %{ + match(If cmp xcc); + effect(USE labl); + predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $xcc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchConUL(cmpOpU cmp, flagsRegUL xcc, label labl) %{ + match(If cmp xcc); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $xcc,$labl" %} + ins_encode %{ + __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{ + match(CountedLoopEnd cmp icc); + effect(USE labl); + + size(4); + ins_cost(BRANCH_COST); + format %{ "B$cmp $icc,$labl\t! Loop end" %} + ins_encode %{ + __ b(*($labl$$label), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(br_cc); +%} + +// instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{ +// match(CountedLoopEnd cmp icc); +// ins_pipe(br_cc); +// %} + +// ============================================================================ +// Long Compare +// +// Currently we hold longs in 2 registers. Comparing such values efficiently +// is tricky. The flavor of compare used depends on whether we are testing +// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. +// The GE test is the negated LT test. The LE test can be had by commuting +// the operands (yielding a GE test) and then negating; negate again for the +// GT test. The EQ test is done by ORcc'ing the high and low halves, and the +// NE test is negated from that. + +// Due to a shortcoming in the ADLC, it mixes up expressions like: +// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the +// difference between 'Y' and '0L'. The tree-matches for the CmpI sections +// are collapsed internally in the ADLC's dfa-gen code. The match for +// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the +// foo match ends up with the wrong leaf. One fix is to not match both +// reg-reg and reg-zero forms of long-compare. This is unfortunate because +// both forms beat the trinary form of long-compare and both are very useful +// on Intel which has so few registers. + +// instruct branchCon_long(cmpOp cmp, flagsRegL xcc, label labl) %{ +// match(If cmp xcc); +// ins_pipe(br_cc); +// %} + +// Manifest a CmpL3 result in an integer register. Very painful. +// This is the test to avoid. +instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{ + match(Set dst (CmpL3 src1 src2) ); + effect( KILL ccr ); + ins_cost(6*DEFAULT_COST); // FIXME + size(32); + format %{ + "CMP $src1.hi, $src2.hi\t\t! long\n" + "\tMOV.gt $dst, 1\n" + "\tmvn.lt $dst, 0\n" + "\tB.ne done\n" + "\tSUBS $dst, $src1.lo, $src2.lo\n" + "\tMOV.hi $dst, 1\n" + "\tmvn.lo $dst, 0\n" + "done:" %} + ins_encode %{ + Label done; + __ cmp($src1$$Register->successor(), $src2$$Register->successor()); + __ mov_i($dst$$Register, 1, Assembler::GT); + __ mvn_i($dst$$Register, 0, Assembler::LT); + __ b(done, Assembler::NE); + __ subs($dst$$Register, $src1$$Register, $src2$$Register); + __ mov_i($dst$$Register, 1, Assembler::HI); + __ mvn_i($dst$$Register, 0, Assembler::LO); + __ bind(done); + %} + ins_pipe(cmpL_reg); +%} + +// Conditional move +instruct cmovLL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(150); + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(150); + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, iRegL src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(150); + size(8); + format %{ "MOV$cmp $dst.lo,$src.lo\t! long\n\t" + "MOV$cmp $dst,$src.hi" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), $src$$Register->successor(), (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovLL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegL dst, immL0 src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + ins_cost(140); + size(8); + format %{ "MOV$cmp $dst.lo,0\t! long\n\t" + "MOV$cmp $dst,0" %} + ins_encode %{ + __ mov($dst$$Register, 0, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegL dst, immL0 src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + ins_cost(140); + size(8); + format %{ "MOV$cmp $dst.lo,0\t! long\n\t" + "MOV$cmp $dst,0" %} + ins_encode %{ + __ mov($dst$$Register, 0, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovLL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegL dst, immL0 src) %{ + match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + ins_cost(140); + size(8); + format %{ "MOV$cmp $dst.lo,0\t! long\n\t" + "MOV$cmp $dst,0" %} + ins_encode %{ + __ mov($dst$$Register, 0, (Assembler::Condition)($cmp$$cmpcode)); + __ mov($dst$$Register->successor(), 0, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, iRegI src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovIL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovIL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegI dst, immI16 src) %{ + match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, iRegP src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(150); + size(4); + format %{ "MOV$cmp $dst,$src" %} + ins_encode %{ + __ mov($dst$$Register, $src$$Register, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_reg); +%} + +instruct cmovPL_imm_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPL_imm_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovPL_imm_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, iRegP dst, immP0 src) %{ + match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(140); + format %{ "MOVW$cmp $dst,$src" %} + ins_encode %{ + __ movw_i($dst$$Register, $src$$constant, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(ialu_imm); +%} + +instruct cmovFL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + ins_cost(150); + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + ins_cost(150); + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovFL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regF dst, regF src) %{ + match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + ins_cost(150); + size(4); + format %{ "FCPYS$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f32($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDL_reg_LTGE(cmpOpL cmp, flagsRegL_LTGE xcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); + + ins_cost(150); + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDL_reg_EQNE(cmpOpL cmp, flagsRegL_EQNE xcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); + + ins_cost(150); + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +instruct cmovDL_reg_LEGT(cmpOpL_commute cmp, flagsRegL_LEGT xcc, regD dst, regD src) %{ + match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src))); + predicate(_kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); + + ins_cost(150); + size(4); + format %{ "FCPYD$cmp $dst,$src" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$FloatRegister, (Assembler::Condition)($cmp$$cmpcode)); + %} + ins_pipe(int_conditional_float_move); +%} + +// ============================================================================ +// Safepoint Instruction +// rather than KILL R12, it would be better to use any reg as +// TEMP. Can't do that at this point because it crashes the compiler +instruct safePoint_poll(iRegP poll, R12RegI tmp, flagsReg icc) %{ + match(SafePoint poll); + effect(USE poll, KILL tmp, KILL icc); + + size(4); + format %{ "LDR $tmp,[$poll]\t! Safepoint: poll for GC" %} + ins_encode %{ + __ relocate(relocInfo::poll_type); + __ ldr($tmp$$Register, Address($poll$$Register)); + %} + ins_pipe(loadPollP); +%} + + +// ============================================================================ +// Call Instructions +// Call Java Static Instruction +instruct CallStaticJavaDirect( method meth ) %{ + match(CallStaticJava); + predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke()); + effect(USE meth); + size(call_static_enc_size(this, _method, _method_handle_invoke)); + + ins_cost(CALL_COST); + format %{ "CALL,static ==> " %} + ins_encode( Java_Static_Call( meth ), call_epilog ); + ins_pipe(simple_call); +%} + +// Call Java Static Instruction (method handle version) +instruct CallStaticJavaHandle( method meth ) %{ + match(CallStaticJava); + predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); + effect(USE meth); + size(call_static_enc_size(this, _method, _method_handle_invoke)); + + // FP is saved by all callees (for interpreter stack correction). + // We use it here for a similar purpose, in {preserve,restore}_FP. + + ins_cost(CALL_COST); + format %{ "CALL,static/MethodHandle ==> " %} + ins_encode( preserve_SP, Java_Static_Call( meth ), restore_SP, call_epilog ); + ins_pipe(simple_call); +%} + +// Call Java Dynamic Instruction +instruct CallDynamicJavaDirect( method meth ) %{ + match(CallDynamicJava); + effect(USE meth); + size(call_dynamic_enc_size()); + + ins_cost(CALL_COST); + format %{ "MOV_OOP (empty),R_R8\n\t" + "CALL,dynamic ; NOP ==> " %} + ins_encode( Java_Dynamic_Call( meth ), call_epilog ); + ins_pipe(call); +%} + +// Call Runtime Instruction +instruct CallRuntimeDirect(method meth) %{ + match(CallRuntime); + effect(USE meth); + ins_cost(CALL_COST); + size(call_runtime_enc_size(this)); + + format %{ "CALL,runtime" %} + ins_encode( Java_To_Runtime( meth ), + call_epilog ); + ins_pipe(simple_call); +%} + +// Call runtime without safepoint - same as CallRuntime +instruct CallLeafDirect(method meth) %{ + match(CallLeaf); + effect(USE meth); + ins_cost(CALL_COST); + size(call_runtime_enc_size(this)); + + format %{ "CALL,runtime leaf" %} + // TODO: ned save_last_PC here? + ins_encode( Java_To_Runtime( meth ), + call_epilog ); + ins_pipe(simple_call); +%} + +// Call runtime without safepoint - same as CallLeaf +instruct CallLeafNoFPDirect(method meth) %{ + match(CallLeafNoFP); + effect(USE meth); + ins_cost(CALL_COST); + size(call_runtime_enc_size(this)); + + format %{ "CALL,runtime leaf nofp" %} + // TODO: ned save_last_PC here? + ins_encode( Java_To_Runtime( meth ), + call_epilog ); + ins_pipe(simple_call); +%} + +// Tail Call; Jump from runtime stub to Java code. +// Also known as an 'interprocedural jump'. +// Target of jump will eventually return to caller. +// TailJump below removes the return address. +instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{ + match(TailCall jump_target method_oop ); + + ins_cost(CALL_COST); + format %{ "MOV Rexception_pc, LR\n\t" + "jump $jump_target \t! $method_oop holds method oop" %} + ins_encode %{ + __ mov(r3, lr); // this is used only to call + // StubRoutines::forward_exception_entry() + // which expects PC of exception in + // R3. FIXME? + __ b($jump_target$$Register); + %} + ins_pipe(tail_call); +%} + + +// Return Instruction +instruct Ret() %{ + match(Return); + + format %{ "ret LR" %} + + ins_encode %{ + __ ret(lr); + %} + + ins_pipe(br); +%} + + +// Tail Jump; remove the return address; jump to target. +// TailCall above leaves the return address around. +// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). +// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a +// "restore" before this instruction (in Epilogue), we need to materialize it +// in %i0. +instruct tailjmpInd(IPRegP jump_target, RExceptionRegP ex_oop) %{ + match( TailJump jump_target ex_oop ); + ins_cost(CALL_COST); + format %{ "MOV Rexception_pc, LR\n\t" + "jump $jump_target \t! $ex_oop holds exc. oop" %} + ins_encode %{ + __ mov(r3, lr); + __ b($jump_target$$Register); + %} + ins_pipe(tail_call); +%} + +// Create exception oop: created by stack-crawling runtime code. +// Created exception is now available to this handler, and is setup +// just prior to jumping to this handler. No code emitted. +instruct CreateException( RExceptionRegP ex_oop ) +%{ + match(Set ex_oop (CreateEx)); + ins_cost(0); + + size(0); + // use the following format syntax + format %{ "! exception oop is in Rexception_obj; no code emitted" %} + ins_encode(); + ins_pipe(empty); +%} + + +// Rethrow exception: +// The exception oop will come in the first argument position. +// Then JUMP (not call) to the rethrow stub code. +instruct RethrowException() +%{ + match(Rethrow); + ins_cost(CALL_COST); + + // use the following format syntax + format %{ "b rethrow_stub" %} + ins_encode %{ + Register scratch = r1; + assert_different_registers(scratch, c_rarg0, lr); + __ jump(OptoRuntime::rethrow_stub(), relocInfo::runtime_call_type, scratch); + %} + ins_pipe(tail_call); +%} + + +// Die now +instruct ShouldNotReachHere( ) +%{ + match(Halt); + ins_cost(CALL_COST); + + size(4); + // Use the following format syntax + format %{ "ShouldNotReachHere" %} + ins_encode %{ + __ udf(0xdead); + %} + ins_pipe(tail_call); +%} + +// ============================================================================ +// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass +// array for an instance of the superklass. Set a hidden internal cache on a +// hit (cache is checked with exposed code in gen_subtype_check()). Return +// not zero for a miss or zero for a hit. The encoding ALSO sets flags. +instruct partialSubtypeCheck( R0RegP index, R1RegP sub, R2RegP super, flagsRegP pcc, LRRegP lr, R9RegI r9, R12RegI r12 ) %{ + match(Set index (PartialSubtypeCheck sub super)); + effect( KILL pcc, KILL r9, KILL r12, KILL lr ); + ins_cost(DEFAULT_COST*10); + format %{ "CALL PartialSubtypeCheck" %} + ins_encode %{ + __ call(StubRoutines::aarch32::partial_subtype_check(), relocInfo::runtime_call_type); + %} + ins_pipe(partial_subtype_check_pipe); +%} + +/* instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, immP0 zero, o0RegP idx, o7RegP o7 ) %{ */ +/* match(Set pcc (CmpP (PartialSubtypeCheck sub super) zero)); */ +/* ins_pipe(partial_subtype_check_pipe); */ +/* %} */ + + +// ============================================================================ +// inlined locking and unlocking + +instruct cmpFastLock(flagsRegP pcc, iRegP object, iRegP box, iRegP mark, iRegP scratch2, iRegP scratch ) +%{ + match(Set pcc (FastLock object box)); + + effect(TEMP mark, TEMP scratch, TEMP scratch2); + ins_cost(100); + + format %{ "FASTLOCK $object, $box; KILL $mark, $scratch, $scratch2" %} + ins_encode %{ + __ fast_lock($object$$Register, $box$$Register, $mark$$Register, $scratch$$Register, $scratch2$$Register); + %} + ins_pipe(long_memory_op); +%} + + +instruct cmpFastUnlock(flagsRegP pcc, iRegP object, iRegP box, iRegP scratch2, iRegP scratch ) %{ + match(Set pcc (FastUnlock object box)); + effect(TEMP scratch, TEMP scratch2); + ins_cost(100); + + format %{ "FASTUNLOCK $object, $box; KILL $scratch, $scratch2" %} + ins_encode %{ + __ fast_unlock($object$$Register, $box$$Register, $scratch$$Register, $scratch2$$Register); + %} + ins_pipe(long_memory_op); +%} + +// Count and Base registers are fixed because the allocator cannot +// kill unknown registers. The encodings are generic. +instruct clear_array(iRegX cnt, iRegP base, iRegI temp, iRegX zero, Universe dummy, flagsReg cpsr) %{ + match(Set dummy (ClearArray cnt base)); + effect(TEMP temp, TEMP zero, KILL cpsr); + ins_cost(300); + format %{ "MOV $zero,0\n" + " MOV $temp,$cnt\n" + "loop: SUBS $temp,$temp,4\t! Count down a dword of bytes\n" + " STR.ge $zero,[$base+$temp]\t! delay slot" + " B.gt loop\t\t! Clearing loop\n" %} + ins_encode %{ + __ mov($zero$$Register, 0); + __ mov($temp$$Register, $cnt$$Register); + Label(loop); + __ bind(loop); + __ subs($temp$$Register, $temp$$Register, 4); + __ str($zero$$Register, Address($base$$Register, $temp$$Register), Assembler::GE); + __ b(loop, Assembler::GT); + %} + ins_pipe(long_memory_op); +%} + +instruct string_compareUU(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, + iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $tmp3, $tmp4" %} + ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (2), (2)) ); + ins_pipe(long_memory_op); +%} + +instruct string_compareLL(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, + iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $tmp3, $tmp4" %} + ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (1), (1)) ); + ins_pipe(long_memory_op); +%} + +instruct string_compareUL(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, + iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $tmp3, $tmp4" %} + ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (2), (1)) ); + ins_pipe(long_memory_op); +%} + +instruct string_compareLU(R0RegP str1, R1RegP str2, R2RegI cnt1, R3RegI cnt2, iRegI result, + iRegI tmp1, iRegI tmp2, Q0_regD tmp3, Q1_regD tmp4, flagsReg ccr) %{ + predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); + match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $tmp3, $tmp4" %} + ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result, tmp1, tmp2, tmp3, tmp4, (1), (2)) ); + ins_pipe(long_memory_op); +%} + +instruct string_equalsUU(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, flagsReg ccr) %{ + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp1, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "String Equals $str1,$str2,$cnt -> $result # KILL $tmp1" %} + ins_encode( enc_Array_Equals(str1, str2, cnt, tmp1, result, (2), (false)) ); + ins_pipe(long_memory_op); +%} + +instruct string_equalsLL(R0RegP str1, R1RegP str2, R2RegI cnt, iRegI result, iRegI tmp1, flagsReg ccr) %{ + predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (StrEquals (Binary str1 str2) cnt)); + effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP tmp1, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "String Equals $str1,$str2,$cnt -> $result # KILL $tmp1" %} + ins_encode( enc_Array_Equals(str1, str2, cnt, tmp1, result, (1), (false)) ); + ins_pipe(long_memory_op); +%} + +instruct array_equalsUU(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI result, flagsReg ccr) %{ + predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "Array Equals $ary1,$ary2 -> $result # KILL $tmp1,$tmp2" %} + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result, (2), (true))); + ins_pipe(long_memory_op); +%} + +instruct array_equalsLL(R0RegP ary1, R1RegP ary2, iRegI tmp1, iRegI tmp2, iRegI result, flagsReg ccr) %{ + predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP result, KILL ccr); + + ins_cost(300); + format %{ "Array Equals $ary1,$ary2 -> $result # KILL $tmp1,$tmp2" %} + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result, (1), (true))); + ins_pipe(long_memory_op); +%} + +instruct string_compress(R2RegP src, R1RegP dst, R3RegI len, + R9RegI tmp1, Q0_regD tmp2, Q1_regD tmp3, R12RegI tmp4, LRRegP lr, R0RegI result, flagsReg ccr) +%{ + match(Set result (StrCompressedCopy src (Binary dst len))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP lr, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr); + + format %{ "String Compress $src,$dst -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4, $lr" %} + ins_encode( enc_Char_Array_Compress(src, dst, len, tmp1, tmp2, tmp3, tmp4, result, ccr) ); + ins_pipe(long_memory_op); +%} + +instruct string_inflate(Universe dummy, R0RegP src, R1RegP dst, R2RegI len, + iRegI tmp1, Q0_regD tmp2, LRRegP lr, flagsReg ccr) +%{ + match(Set dummy (StrInflatedCopy src (Binary dst len))); + effect(TEMP tmp1, TEMP tmp2, TEMP lr, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr); + + format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2, $lr" %} + ins_encode( enc_Byte_Array_Inflate(src, dst, len, tmp1, tmp2, ccr) ); + ins_pipe(long_memory_op); +%} + +//---------- Zeros Count Instructions ------------------------------------------ + +instruct countLeadingZerosI(iRegI dst, iRegI src) %{ + match(Set dst (CountLeadingZerosI src)); + size(4); + format %{ "CLZ_32 $dst,$src" %} + ins_encode %{ + __ clz($dst$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countLeadingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{ + match(Set dst (CountLeadingZerosL src)); + effect(TEMP tmp, TEMP dst, KILL ccr); + size(16); + format %{ "CLZ $dst,$src.hi\n\t" + "TEQ $dst,32\n\t" + "CLZ.eq $tmp,$src.lo\n\t" + "ADD.eq $dst, $dst, $tmp\n\t" %} + ins_encode %{ + __ clz($dst$$Register, $src$$Register->successor()); + __ teq($dst$$Register, 32); + __ clz($tmp$$Register, $src$$Register, Assembler::EQ); + __ add($dst$$Register, $dst$$Register, $tmp$$Register, Assembler::EQ); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosI(iRegI dst, iRegI src, iRegI tmp) %{ + match(Set dst (CountTrailingZerosI src)); + effect(TEMP tmp); + size(8); + format %{ "RBIT_32 $tmp, $src\n\t" + "CLZ_32 $dst,$tmp" %} + ins_encode %{ + __ rbit($tmp$$Register, $src$$Register); + __ clz($dst$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); +%} + +instruct countTrailingZerosL(iRegI dst, iRegL src, iRegI tmp, flagsReg ccr) %{ + match(Set dst (CountTrailingZerosL src)); + effect(TEMP tmp, TEMP dst, KILL ccr); + size(24); + format %{ "RBIT $tmp,$src.lo\n\t" + "CLZ $dst,$tmp\n\t" + "TEQ $dst,32\n\t" + "RBIT $tmp,$src.hi\n\t" + "CLZ.eq $tmp,$tmp\n\t" + "ADD.eq $dst,$dst,$tmp\n\t" %} + ins_encode %{ + __ rbit($tmp$$Register, $src$$Register); + __ clz($dst$$Register, $tmp$$Register); + __ teq($dst$$Register, 32); + __ rbit($tmp$$Register, $src$$Register->successor()); + __ clz($tmp$$Register, $tmp$$Register, Assembler::EQ); + __ add($dst$$Register, $dst$$Register, $tmp$$Register, Assembler::EQ); + %} + ins_pipe(ialu_reg); +%} + + +//---------- Population Count Instructions ------------------------------------- + +instruct popCountI(iRegI dst, iRegI src, regD_low tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + effect(TEMP tmp); + + format %{ "FMSR $tmp,$src\n\t" + "VCNT.8 $tmp,$tmp\n\t" + "VPADDL.U8 $tmp,$tmp\n\t" + "VPADDL.U16 $tmp,$tmp\n\t" + "FMRS $dst,$tmp" %} + size(20); + + ins_encode %{ + __ vmov_f32($tmp$$FloatRegister, $src$$Register); + __ vcnt_64($tmp$$FloatRegister, $tmp$$FloatRegister); + __ vpaddl_64_u8($tmp$$FloatRegister, $tmp$$FloatRegister); + __ vpaddl_64_u16($tmp$$FloatRegister, $tmp$$FloatRegister); + __ vmov_f32($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Note: Long.bitCount(long) returns an int. +instruct popCountL(iRegI dst, iRegL src, regD_low tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + effect(TEMP tmp); + + format %{ "FMDRR $tmp,$src.lo,$src.hi\n\t" + "VCNT.8 $tmp,$tmp\n\t" + "VPADDL.U8 $tmp,$tmp\n\t" + "VPADDL.U16 $tmp,$tmp\n\t" + "VPADDL.U32 $tmp,$tmp\n\t" + "FMRS $dst,$tmp" %} + + size(32); + + ins_encode %{ + __ vmov_f64($tmp$$FloatRegister, $src$$Register, $src$$Register->successor()); + __ vcnt_64($tmp$$FloatRegister, $tmp$$FloatRegister); + __ vpaddl_64_u8($tmp$$FloatRegister, $tmp$$FloatRegister); + __ vpaddl_64_u16($tmp$$FloatRegister, $tmp$$FloatRegister); + __ vpaddl_64_u32($tmp$$FloatRegister, $tmp$$FloatRegister); + __ vmov_f32($dst$$Register, $tmp$$FloatRegister); + %} + ins_pipe(ialu_reg); +%} + + +// ============================================================================ +//------------Bytes reverse-------------------------------------------------- + +instruct bytes_reverse_int(iRegI dst, iRegI src) %{ + match(Set dst (ReverseBytesI src)); + + size(4); + format %{ "REV32 $dst,$src" %} + ins_encode %{ + __ rev($dst$$Register, $src$$Register); + %} + ins_pipe( iload_mem ); // FIXME +%} + +instruct bytes_reverse_long(iRegL dst, iRegL src) %{ + match(Set dst (ReverseBytesL src)); + effect(TEMP dst); + size(8); + format %{ "REV $dst.lo,$src.lo\n\t" + "REV $dst.hi,$src.hi" %} + ins_encode %{ + __ rev($dst$$Register, $src$$Register->successor()); + __ rev($dst$$Register->successor(), $src$$Register); + %} + ins_pipe( iload_mem ); // FIXME +%} + +instruct bytes_reverse_unsigned_short(iRegI dst, iRegI src) %{ + match(Set dst (ReverseBytesUS src)); + size(4); + format %{ "REV16 $dst,$src" %} + ins_encode %{ + __ rev16($dst$$Register, $src$$Register); + %} + ins_pipe( iload_mem ); // FIXME +%} + +instruct bytes_reverse_short(iRegI dst, iRegI src) %{ + match(Set dst (ReverseBytesS src)); + size(4); + format %{ "REVSH $dst,$src" %} + ins_encode %{ + __ revsh($dst$$Register, $src$$Register); + %} + ins_pipe( iload_mem ); // FIXME +%} + + +// ====================VECTOR INSTRUCTIONS===================================== + +// Load Aligned Packed values into a Double Register +instruct loadV8(vecD dst, memoryD mem) %{ + predicate(n->as_LoadVector()->memory_size() == 8); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FLDD $mem,$dst\t! load vector (8 bytes)" %} + ins_encode %{ + __ vldr_f64($dst$$FloatRegister, $mem$$Address); + %} + ins_pipe(floadD_mem); +%} + +// Load Aligned Packed values into a Double Register Pair +instruct loadV16(vecX dst, memoryvld mem) %{ + predicate(n->as_LoadVector()->memory_size() == 16); + match(Set dst (LoadVector mem)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "VLD1 $mem,$dst.Q\t! load vector (16 bytes)" %} + ins_encode %{ + __ vld1_16($dst$$FloatRegister, $dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE), $mem$$Address, Assembler::ALIGN_STD); + %} + ins_pipe(floadD_mem); // FIXME +%} + +// Store Vector in Double register to memory +instruct storeV8(memoryD mem, vecD src) %{ + predicate(n->as_StoreVector()->memory_size() == 8); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "FSTD $src,$mem\t! store vector (8 bytes)" %} + ins_encode %{ + __ vstr_f64($src$$FloatRegister, $mem$$Address); + %} + ins_pipe(fstoreD_mem_reg); +%} + +// Store Vector in Double Register Pair to memory +instruct storeV16(memoryvld mem, vecX src) %{ + predicate(n->as_StoreVector()->memory_size() == 16); + match(Set mem (StoreVector mem src)); + ins_cost(MEMORY_REF_COST); + size(4); + format %{ "VST1 $src,$mem\t! store vector (16 bytes)" %} + ins_encode %{ + __ vst1_16($src$$FloatRegister, $src$$FloatRegister->successor(FloatRegisterImpl::DOUBLE), $mem$$Address, Assembler::ALIGN_STD); + %} + ins_pipe(fstoreD_mem_reg); // FIXME +%} + +// Replicate scalar to packed byte values in Double register +instruct Repl8B_reg(vecD dst, iRegI src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(DEFAULT_COST*4); + effect(TEMP tmp); + size(16); + + // FIXME: could use PKH instruction instead? + format %{ "LSL $tmp, $src, 24 \n\t" + "OR $tmp, $tmp, ($tmp >> 8) \n\t" + "OR $tmp, $tmp, ($tmp >> 16) \n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode %{ + __ mov($tmp$$Register, $src$$Register, lsl(24)); + __ orr($tmp$$Register, $tmp$$Register, $tmp$$Register, lsr(8)); + __ orr($tmp$$Register, $tmp$$Register, $tmp$$Register, lsr(16)); + __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed byte values in Double register +instruct Repl8B_reg_simd(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateB src)); + size(4); + + format %{ "VDUP.8 $dst,$src\t" %} + ins_encode %{ + __ vdup_64_8($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed byte values in Double register pair +instruct Repl16B_reg(vecX dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (ReplicateB src)); + size(4); + + format %{ "VDUP.8 $dst.Q,$src\t" %} + ins_encode %{ + __ vdup_128_8($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register +instruct Repl8B_immI(vecD dst, immI src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(DEFAULT_COST*2); + effect(TEMP tmp); + size(12); + + format %{ "MOV $tmp, Repl4($src))\n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode( LdReplImmI(src, dst, tmp, (4), (1)) ); + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register +// TODO: support negative constants with MVNI? +instruct Repl8B_immU8(vecD dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateB src)); + size(4); + + format %{ "VMOV.U8 $dst,$src" %} + ins_encode %{ + __ vmov_64_8($dst$$FloatRegister, $src$$constant); + %} + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register pair +instruct Repl16B_immU8(vecX dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateB src)); + size(4); + + format %{ "VMOV.U8 $dst.Q,$src" %} + ins_encode %{ + __ vmov_128_8($dst$$FloatRegister, $src$$constant); + %} + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar to packed short/char values into Double register +instruct Repl4S_reg(vecD dst, iRegI src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + ins_cost(DEFAULT_COST*3); + effect(TEMP tmp); + size(12); + + // FIXME: could use PKH instruction instead? + format %{ "LSL $tmp, $src, 16 \n\t" + "OR $tmp, $tmp, ($tmp >> 16) \n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode %{ + __ mov($tmp$$Register, $src$$Register, lsl(16)); + __ orr($tmp$$Register, $tmp$$Register, $tmp$$Register, lsr(16)); + __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed byte values in Double register +instruct Repl4S_reg_simd(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateS src)); + size(4); + + format %{ "VDUP.16 $dst,$src\t" %} + ins_encode %{ + __ vdup_64_16($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed byte values in Double register pair +instruct Repl8S_reg(vecX dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateS src)); + size(4); + + format %{ "VDUP.16 $dst.Q,$src\t" %} + ins_encode %{ + __ vdup_128_16($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + + +// Replicate scalar constant to packed short/char values in Double register +instruct Repl4S_immI(vecD dst, immI src, iRegP tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + effect(TEMP tmp); + size(12); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "MOV $tmp, Repl2($src))\n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode( LdReplImmI(src, dst, tmp, (2), (2)) ); + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register +instruct Repl4S_immU8(vecD dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateS src)); + size(4); + + format %{ "VMOV.U16 $dst,$src" %} + ins_encode %{ + __ vmov_64_16($dst$$FloatRegister, $src$$constant); + %} + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register pair +instruct Repl8S_immU8(vecX dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateS src)); + size(4); + + format %{ "VMOV.U16 $dst.Q,$src" %} + ins_encode %{ + __ vmov_128_16($dst$$FloatRegister, $src$$constant); + %} + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar to packed int values in Double register +instruct Repl2I_reg(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "FMDRR $dst,$src,$src\t" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed int values in Double register pair +instruct Repl4I_reg(vecX dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateI src)); + ins_cost(DEFAULT_COST*2); + size(8); + + format %{ "FMDRR $dst.lo,$src,$src\n\t" + "FMDRR $dst.hi,$src,$src" %} + + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register); + __ vmov_f64($dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE), + $src$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed int values in Double register +instruct Repl2I_reg_simd(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "VDUP.32 $dst.D,$src\t" %} + ins_encode %{ + __ vdup_64_32($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed int values in Double register pair +instruct Repl4I_reg_simd(vecX dst, iRegI src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "VDUP.32 $dst.Q,$src\t" %} + ins_encode %{ + __ vdup_128_32($dst$$FloatRegister, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + + +// Replicate scalar zero constant to packed int values in Double register +instruct Repl2I_immI(vecD dst, immI src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + effect(TEMP tmp); + size(12); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "MOV $tmp, Repl1($src))\n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode( LdReplImmI(src, dst, tmp, (1), (4)) ); + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register +instruct Repl2I_immU8(vecD dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "VMOV.I32 $dst.D,$src" %} + ins_encode %{ + __ vmov_64_32($dst$$FloatRegister, $src$$constant); + %} + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar constant to packed byte values in Double register pair +instruct Repl4I_immU8(vecX dst, immU8 src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateI src)); + size(4); + + format %{ "VMOV.I32 $dst.Q,$src" %} + ins_encode %{ + __ vmov_128_32($dst$$FloatRegister, $src$$constant); + %} + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar to packed byte values in Double register pair +instruct Repl2L_reg(vecX dst, iRegL src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateL src)); + size(8); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FMDRR $dst.D,$src.lo,$src.hi\t\n" + "FMDRR $dst.D.next,$src.lo,$src.hi" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register->successor()); + __ vmov_f64($dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE), + $src$$Register, $src$$Register->successor()); + %} + ins_pipe(ialu_reg); // FIXME +%} + + +// Replicate scalar to packed float values in Double register +instruct Repl2F_regI(vecD dst, iRegI src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + size(4); + + format %{ "FMDRR $dst.D,$src,$src\t" %} + ins_encode %{ + __ vmov_f64($dst$$FloatRegister, $src$$Register, $src$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed float values in Double register +instruct Repl2F_reg_vfp(vecD dst, regF src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + expand %{ + iRegI tmp; + MoveF2I_reg_reg(tmp, src); + Repl2F_regI(dst,tmp); + %} +%} + +// Replicate scalar to packed float values in Double register +instruct Repl2F_reg_simd(vecD dst, regF src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateF src)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + + format %{ "VDUP.32 $dst.D,$src.D\t" %} + ins_encode %{ + __ vdups_64($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed float values in Double register pair +instruct Repl4F_reg(vecX dst, regF src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (ReplicateF src)); + effect(TEMP tmp); + size(4*3); + ins_cost(DEFAULT_COST*3); // FIXME + + format %{ "FMRS $tmp,$src\n\t" + "FMDRR $dst.D,$tmp,$tmp\n\t" + "FMDRR $dst.D.next,$tmp,$tmp\t" %} + ins_encode %{ + __ vmov_f32($tmp$$Register, $src$$FloatRegister); + __ vmov_f64($dst$$FloatRegister, $tmp$$Register, $tmp$$Register); + __ vmov_f64($dst$$FloatRegister->successor(FloatRegisterImpl::DOUBLE), + $tmp$$Register, $tmp$$Register); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar to packed float values in Double register pair +instruct Repl4F_reg_simd(vecX dst, regF src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (ReplicateF src)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + + format %{ "VDUP.32 $dst.Q,$src.D\t" %} + ins_encode %{ + __ vdups_128($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// Replicate scalar zero constant to packed float values in Double register +instruct Repl2F_immI(vecD dst, immF src, iRegI tmp) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + effect(TEMP tmp); + size(12); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "MOV $tmp, Repl1($src))\n\t" + "FMDRR $dst,$tmp,$tmp\t" %} + ins_encode( LdReplImmF(src, dst, tmp) ); + ins_pipe(loadConFD); // FIXME +%} + +// Replicate scalar to packed double float values in Double register pair +instruct Repl2D_reg(vecX dst, regD src) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateD src)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FCPYD $dst.D.a,$src\n\t" + "FCPYD $dst.D.b,$src\t" %} + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src = $src$$FloatRegister; + __ vmov_f64(dsta, src); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE); + __ vmov_f64(dstb, src); + %} + ins_pipe(ialu_reg); // FIXME +%} + +// ====================VECTOR ARITHMETIC======================================= + +// --------------------------------- ADD -------------------------------------- + +// Bytes vector add +instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + format %{ "VADD.I8 $dst,$src1,$src2\t! add packed8B" %} + size(4); + ins_encode %{ + __ vadd_64_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB src1 src2)); + size(4); + format %{ "VADD.I8 $dst.Q,$src1.Q,$src2.Q\t! add packed16B" %} + ins_encode %{ + __ vadd_128_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts/Chars vector add +instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + size(4); + format %{ "VADD.I16 $dst,$src1,$src2\t! add packed4S" %} + ins_encode %{ + __ vadd_64_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS src1 src2)); + size(4); + format %{ "VADD.I16 $dst.Q,$src1.Q,$src2.Q\t! add packed8S" %} + ins_encode %{ + __ vadd_128_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector add +instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI src1 src2)); + size(4); + format %{ "VADD.I32 $dst.D,$src1.D,$src2.D\t! add packed2I" %} + ins_encode %{ + __ vadd_64_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI src1 src2)); + size(4); + format %{ "VADD.I32 $dst.Q,$src1.Q,$src2.Q\t! add packed4I" %} + ins_encode %{ + __ vadd_128_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector add +instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL src1 src2)); + size(4); + format %{ "VADD.I64 $dst.Q,$src1.Q,$src2.Q\t! add packed2L" %} + ins_encode %{ + bool quad = true; + __ vadd_128_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Floats vector add +instruct vadd2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + ins_cost(DEFAULT_COST*2); // FIXME + + size(4*2); + format %{ "FADDS $dst.a,$src1.a,$src2.a\n\t" + "FADDS $dst.b,$src1.b,$src2.b" %} + ins_encode %{ + __ vadd_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vadd_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE), + $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE), + $src2$$FloatRegister->successor(FloatRegisterImpl::SINGLE)); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} + +instruct vadd4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVF src1 src2)); + size(4*4); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "FADDS $dst.a,$src1.a,$src2.a\n\t" + "FADDS $dst.b,$src1.b,$src2.b\n\t" + "FADDS $dst.c,$src1.c,$src2.c\n\t" + "FADDS $dst.d,$src1.d,$src2.d" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ vadd_f32(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE); + __ vadd_f32(dstb, src1b, src2b); + FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE); + __ vadd_f32(dstc, src1c, src2c); + FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE); + __ vadd_f32(dstd, src1d, src2d); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} + +instruct vadd2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVD src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FADDD $dst.a,$src1.a,$src2.a\n\t" + "FADDD $dst.b,$src1.b,$src2.b" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ vadd_f64(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE); + FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE); + FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE); + __ vadd_f64(dstb, src1b, src2b); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} + + +// Bytes vector sub +instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + size(4); + format %{ "VSUB.I8 $dst,$src1,$src2\t! sub packed8B" %} + ins_encode %{ + __ vsub_64_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (SubVB src1 src2)); + size(4); + format %{ "VSUB.I8 $dst.Q,$src1.Q,$src2.Q\t! sub packed16B" %} + ins_encode %{ + __ vsub_128_8($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts/Chars vector sub +instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + size(4); + format %{ "VSUB.I16 $dst,$src1,$src2\t! sub packed4S" %} + ins_encode %{ + __ vsub_64_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsub16S_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (SubVS src1 src2)); + size(4); + format %{ "VSUB.I16 $dst.Q,$src1.Q,$src2.Q\t! sub packed8S" %} + ins_encode %{ + __ vsub_128_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector sub +instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI src1 src2)); + size(4); + format %{ "VSUB.I32 $dst,$src1,$src2\t! sub packed2I" %} + ins_encode %{ + __ vsub_64_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVI src1 src2)); + size(4); + format %{ "VSUB.I32 $dst.Q,$src1.Q,$src2.Q\t! sub packed4I" %} + ins_encode %{ + bool quad = true; + __ vsub_128_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector sub +instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVL src1 src2)); + size(4); + format %{ "VSUB.I64 $dst.Q,$src1.Q,$src2.Q\t! sub packed2L" %} + ins_encode %{ + __ vsub_128_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Floats vector sub +instruct vsub2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVF src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FSUBS $dst.a,$src1.a,$src2.a\n\t" + "FSUBS $dst.b,$src1.b,$src2.b" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ vsub_f32(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE); + __ vsub_f32(dstb, src1b, src2b); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} + + +instruct vsub4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (SubVF src1 src2)); + size(4*4); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "FSUBS $dst.a,$src1.a,$src2.a\n\t" + "FSUBS $dst.b,$src1.b,$src2.b\n\t" + "FSUBS $dst.c,$src1.c,$src2.c\n\t" + "FSUBS $dst.d,$src1.d,$src2.d" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ vsub_f32(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE); + __ vsub_f32(dstb, src1b, src2b); + FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE); + __ vsub_f32(dstc, src1c, src2c); + FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE); + __ vsub_f32(dstd, src1d, src2d); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} + +instruct vsub2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVD src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FSUBD $dst.a,$src1.a,$src2.a\n\t" + "FSUBD $dst.b,$src1.b,$src2.b" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ vsub_f64(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE); + FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE); + FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE); + __ vsub_f64(dstb, src1b, src2b); + %} + + ins_pipe(faddF_reg_reg); // FIXME +%} + +// Shorts/Chars vector mul +instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + size(4); + format %{ "VMUL.I16 $dst,$src1,$src2\t! mul packed4S" %} + ins_encode %{ + __ vmul_64_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (MulVS src1 src2)); + size(4); + format %{ "VMUL.I16 $dst.Q,$src1.Q,$src2.Q\t! mul packed8S" %} + ins_encode %{ + __ vmul_128_16($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector mul +instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVI src1 src2)); + size(4); + format %{ "VMUL.I32 $dst,$src1,$src2\t! mul packed2I" %} + ins_encode %{ + __ vmul_64_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVI src1 src2)); + size(4); + format %{ "VMUL.I32 $dst.Q,$src1.Q,$src2.Q\t! mul packed4I" %} + ins_encode %{ + __ vmul_128_32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Floats vector mul +instruct vmul2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FMULS $dst.a,$src1.a,$src2.a\n\t" + "FMULS $dst.b,$src1.b,$src2.b" %} + ins_encode %{ + __ vmul_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vmul_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE), + $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE), + $src2$$FloatRegister->successor(FloatRegisterImpl::SINGLE)); + %} + + ins_pipe(fmulF_reg_reg); // FIXME +%} + +instruct vmul4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (MulVF src1 src2)); + size(4*4); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "FMULS $dst.a,$src1.a,$src2.a\n\t" + "FMULS $dst.b,$src1.b,$src2.b\n\t" + "FMULS $dst.c,$src1.c,$src2.c\n\t" + "FMULS $dst.d,$src1.d,$src2.d" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ vmul_f32(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE); + __ vmul_f32(dstb, src1b, src2b); + FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE); + __ vmul_f32(dstc, src1c, src2c); + FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE); + __ vmul_f32(dstd, src1d, src2d); + %} + + ins_pipe(fmulF_reg_reg); // FIXME +%} + +instruct vmul2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVD src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FMULD $dst.D.a,$src1.D.a,$src2.D.a\n\t" + "FMULD $dst.D.b,$src1.D.b,$src2.D.b" %} + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ vmul_f64(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE); + FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE); + FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE); + __ vmul_f64(dstb, src1b, src2b); + %} + + ins_pipe(fmulD_reg_reg); // FIXME +%} + + +// Floats vector div +instruct vdiv2F_reg_vfp(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVF src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FDIVS $dst.a,$src1.a,$src2.a\n\t" + "FDIVS $dst.b,$src1.b,$src2.b" %} + ins_encode %{ + __ vdiv_f32($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + __ vdiv_f32($dst$$FloatRegister->successor(FloatRegisterImpl::SINGLE), + $src1$$FloatRegister->successor(FloatRegisterImpl::SINGLE), + $src2$$FloatRegister->successor(FloatRegisterImpl::SINGLE)); + %} + + ins_pipe(fdivF_reg_reg); // FIXME +%} + +instruct vdiv4F_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (DivVF src1 src2)); + size(4*4); + ins_cost(DEFAULT_COST*4); // FIXME + + format %{ "FDIVS $dst.a,$src1.a,$src2.a\n\t" + "FDIVS $dst.b,$src1.b,$src2.b\n\t" + "FDIVS $dst.c,$src1.c,$src2.c\n\t" + "FDIVS $dst.d,$src1.d,$src2.d" %} + + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ vdiv_f32(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1b = src1a->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2b = src2a->successor(FloatRegisterImpl::SINGLE); + __ vdiv_f32(dstb, src1b, src2b); + FloatRegister dstc = dstb->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1c = src1b->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2c = src2b->successor(FloatRegisterImpl::SINGLE); + __ vdiv_f32(dstc, src1c, src2c); + FloatRegister dstd = dstc->successor(FloatRegisterImpl::SINGLE); + FloatRegister src1d = src1c->successor(FloatRegisterImpl::SINGLE); + FloatRegister src2d = src2c->successor(FloatRegisterImpl::SINGLE); + __ vdiv_f32(dstd, src1d, src2d); + %} + + ins_pipe(fdivF_reg_reg); // FIXME +%} + +instruct vdiv2D_reg_vfp(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVD src1 src2)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "FDIVD $dst.D.a,$src1.D.a,$src2.D.a\n\t" + "FDIVD $dst.D.b,$src1.D.b,$src2.D.b" %} + ins_encode %{ + FloatRegister dsta = $dst$$FloatRegister; + FloatRegister src1a = $src1$$FloatRegister; + FloatRegister src2a = $src2$$FloatRegister; + __ vdiv_f64(dsta, src1a, src2a); + FloatRegister dstb = dsta->successor(FloatRegisterImpl::DOUBLE); + FloatRegister src1b = src1a->successor(FloatRegisterImpl::DOUBLE); + FloatRegister src2b = src2a->successor(FloatRegisterImpl::DOUBLE); + __ vdiv_f64(dstb, src1b, src2b); + %} + + ins_pipe(fdivD_reg_reg); // FIXME +%} + +// --------------------------------- NEG -------------------------------------- + +instruct vneg8B_reg(vecD dst, vecD src) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + effect(DEF dst, USE src); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ "VNEG.S8 $dst.D,$src.D\t! neg packed8B" %} + ins_encode %{ + __ vneg_64_s8($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vneg16B_reg(vecX dst, vecX src) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + effect(DEF dst, USE src); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ "VNEG.S8 $dst.Q,$src.Q\t! neg0 packed16B" %} + ins_encode %{ + __ vneg_128_s8($dst$$FloatRegister, $src$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// ------------------------------ Shift --------------------------------------- + +instruct vslcntD(vecD dst, iRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (LShiftCntV cnt)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + Repl8B_reg_simd(dst, cnt); + %} +%} + +instruct vslcntX(vecX dst, iRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (LShiftCntV cnt)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + Repl16B_reg(dst, cnt); + %} +%} + +// Low bits of vector "shift" elements are used, so it +// doesn't matter if we treat it as ints or bytes here. +instruct vsrcntD(vecD dst, iRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 8 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (RShiftCntV cnt)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + + format %{ "VDUP.8 $dst.D,$cnt\n\t" + "VNEG.S8 $dst.D,$dst.D\t! neg packed8B" %} + ins_encode %{ + __ vdup_64_8($dst$$FloatRegister, $cnt$$Register); + __ vneg_64_s8($dst$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsrcntX(vecX dst, iRegI cnt) %{ + predicate(n->as_Vector()->length_in_bytes() == 16 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (RShiftCntV cnt)); + size(4*2); + ins_cost(DEFAULT_COST*2); // FIXME + format %{ "VDUP.8 $dst.Q,$cnt\n\t" + "VNEG.S8 $dst.Q,$dst.Q\t! neg packed16B" %} + ins_encode %{ + __ vdup_128_8($dst$$FloatRegister, $cnt$$Register); + __ vneg_128_s8($dst$$FloatRegister, $dst$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Byte vector logical left/right shift based on sign +instruct vsh8B_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 8); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U8 $dst.D,$src.D,$shift.D\t! logical left/right shift packed8B" + %} + ins_encode %{ + __ vshl_64_u8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsh16B_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U8 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed16B" + %} + ins_encode %{ + bool quad = true; + __ vshl_128_u8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts/Char vector logical left/right shift based on sign +instruct vsh4S_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U16 $dst.D,$src.D,$shift.D\t! logical left/right shift packed4S" + %} + ins_encode %{ + __ vshl_64_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsh8S_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U16 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed8S" + %} + ins_encode %{ + __ vshl_128_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector logical left/right shift based on sign +instruct vsh2I_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U32 $dst.D,$src.D,$shift.D\t! logical left/right shift packed2I" + %} + ins_encode %{ + __ vshl_64_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsh4I_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U32 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed4I" + %} + ins_encode %{ + __ vshl_128_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector logical left/right shift based on sign +instruct vsh2L_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.U64 $dst.Q,$src.Q,$shift.Q\t! logical left/right shift packed2L" + %} + ins_encode %{ + __ vshl_128_u64($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// ------------------------------ LeftShift ----------------------------------- + +// Byte vector left shift +instruct vsl8B_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh8B_reg(dst, src, shift); + %} +%} + +instruct vsl16B_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh16B_reg(dst, src, shift); + %} +%} + +instruct vsl8B_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I8 $dst.D,$src.D,$shift\t! logical left shift packed8B" + %} + ins_encode %{ + __ vshl_64_8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsl16B_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (LShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I8 $dst.Q,$src.Q,$shift\t! logical left shift packed16B" + %} + ins_encode %{ + bool quad = true; + __ vshl_128_8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts/Chars vector logical left/right shift +instruct vsl4S_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + match(Set dst (URShiftVS src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh4S_reg(dst, src, shift); + %} +%} + +instruct vsl8S_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + match(Set dst (URShiftVS src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh8S_reg(dst, src, shift); + %} +%} + +instruct vsl4S_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I16 $dst.D,$src.D,$shift\t! logical left shift packed4S" + %} + ins_encode %{ + bool quad = false; + __ vshl_64_16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsl8S_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (LShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I16 $dst.Q,$src.Q,$shift\t! logical left shift packed8S" + %} + ins_encode %{ + bool quad = true; + __ vshl_128_16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector logical left/right shift +instruct vsl2I_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (LShiftVI src shift)); + match(Set dst (URShiftVI src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh2I_reg(dst, src, shift); + %} +%} + +instruct vsl4I_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (LShiftVI src shift)); + match(Set dst (URShiftVI src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh4I_reg(dst, src, shift); + %} +%} + +instruct vsl2I_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (LShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I32 $dst.D,$src.D,$shift\t! logical left shift packed2I" + %} + ins_encode %{ + __ vshl_64_32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsl4I_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (LShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I32 $dst.Q,$src.Q,$shift\t! logical left shift packed4I" + %} + ins_encode %{ + __ vshl_128_32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector logical left/right shift +instruct vsl2L_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + match(Set dst (URShiftVL src shift)); + size(4*1); + ins_cost(DEFAULT_COST*1); // FIXME + expand %{ + vsh2L_reg(dst, src, shift); + %} +%} + +instruct vsl2L_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVL src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.I64 $dst.Q,$src.Q,$shift\t! logical left shift packed2L" + %} + ins_encode %{ + __ vshl_128_64($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// ----------------------- LogicalRightShift ----------------------------------- + +// Bytes/Shorts vector logical right shift produces incorrect Java result +// for negative data because java code convert short value into int with +// sign extension before a shift. + +// Chars vector logical right shift +instruct vsrl4S_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U16 $dst.D,$src.D,$shift\t! logical right shift packed4S" + %} + ins_encode %{ + __ vshr_64_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsrl8S_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (URShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S" + %} + ins_encode %{ + __ vshr_128_u16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector logical right shift +instruct vsrl2I_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (URShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U32 $dst.D,$src.D,$shift\t! logical right shift packed2I" + %} + ins_encode %{ + __ vshr_64_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsrl4I_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 && (VM_Version::features() & FT_AdvSIMD)); + match(Set dst (URShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I" + %} + ins_encode %{ + __ vshr_128_u32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector logical right shift +instruct vsrl2L_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVL src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.U64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L" + %} + ins_encode %{ + __ vshr_128_u64($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// ------------------- ArithmeticRightShift ----------------------------------- + +// Bytes vector arithmetic left/right shift based on sign +instruct vsha8B_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 8); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S8 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed8B" + %} + ins_encode %{ + __ vshl_64_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsha16B_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S8 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed16B" + %} + ins_encode %{ + __ vshl_128_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts vector arithmetic left/right shift based on sign +instruct vsha4S_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S16 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed4S" + %} + ins_encode %{ + __ vshl_64_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsha8S_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S16 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed8S" + %} + ins_encode %{ + __ vshl_128_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector arithmetic left/right shift based on sign +instruct vsha2I_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S32 $dst.D,$src.D,$shift.D\t! arithmetic right shift packed2I" + %} + ins_encode %{ + __ vshl_64_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsha4I_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S32 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed4I" + %} + ins_encode %{ + __ vshl_128_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector arithmetic left/right shift based on sign +instruct vsha2L_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + effect(DEF dst, USE src, USE shift); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHL.S64 $dst.Q,$src.Q,$shift.Q\t! arithmetic right shift packed2L" + %} + ins_encode %{ + __ vshl_128_s64($dst$$FloatRegister, $src$$FloatRegister, $shift$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Byte vector arithmetic right shift + +instruct vsra8B_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha8B_reg(dst, src, shift); + %} +%} + +instruct vsrl16B_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha16B_reg(dst, src, shift); + %} +%} + +instruct vsrl8B_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S8 $dst.D,$src.D,$shift\t! logical right shift packed8B" + %} + ins_encode %{ + __ vshr_64_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsrl16B_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (RShiftVB src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S8 $dst.Q,$src.Q,$shift\t! logical right shift packed16B" + %} + ins_encode %{ + __ vshr_128_s8($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Shorts vector arithmetic right shift +instruct vsra4S_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha4S_reg(dst, src, shift); + %} +%} + +instruct vsra8S_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha8S_reg(dst, src, shift); + %} +%} + +instruct vsra4S_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S16 $dst.D,$src.D,$shift\t! logical right shift packed4S" + %} + ins_encode %{ + __ vshr_64_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsra8S_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (RShiftVS src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S16 $dst.Q,$src.Q,$shift\t! logical right shift packed8S" + %} + ins_encode %{ + __ vshr_128_s16($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Integers vector arithmetic right shift +instruct vsra2I_reg(vecD dst, vecD src, vecD shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha2I_reg(dst, src, shift); + %} +%} + +instruct vsra4I_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha4I_reg(dst, src, shift); + %} +%} + +instruct vsra2I_immI(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S32 $dst.D,$src.D,$shift\t! logical right shift packed2I" + %} + ins_encode %{ + __ vshr_64_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vsra4I_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (RShiftVI src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S32 $dst.Q,$src.Q,$shift\t! logical right shift packed4I" + %} + ins_encode %{ + __ vshr_128_s32($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// Longs vector arithmetic right shift +instruct vsra2L_reg(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + expand %{ + vsha2L_reg(dst, src, shift); + %} +%} + +instruct vsra2L_immI(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVL src shift)); + size(4); + ins_cost(DEFAULT_COST); // FIXME + format %{ + "VSHR.S64 $dst.Q,$src.Q,$shift\t! logical right shift packed2L" + %} + ins_encode %{ + __ vshr_128_s64($dst$$FloatRegister, $src$$FloatRegister, $shift$$constant); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// --------------------------------- AND -------------------------------------- + +instruct vandD(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src1 src2)); + format %{ "VAND $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %} + ins_encode %{ + __ vand_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vandX(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (AndV src1 src2)); + format %{ "VAND $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %} + ins_encode %{ + bool quad = true; + __ vand_128($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// --------------------------------- OR --------------------------------------- + +instruct vorD(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (OrV src1 src2)); + format %{ "VOR $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %} + ins_encode %{ + __ vorr_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vorX(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (OrV src1 src2)); + format %{ "VOR $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %} + ins_encode %{ + __ vorr_128($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +// --------------------------------- XOR -------------------------------------- + +instruct vxorD(vecD dst, vecD src1, vecD src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src1 src2)); + format %{ "VXOR $dst.D,$src1.D,$src2.D\t! and vectors (8 bytes)" %} + ins_encode %{ + __ veor_64($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + +instruct vxorX(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src1 src2)); + format %{ "VXOR $dst.Q,$src1.Q,$src2.Q\t! and vectors (16 bytes)" %} + ins_encode %{ + __ veor_128($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); + %} + ins_pipe( ialu_reg_reg ); // FIXME +%} + + +//----------PEEPHOLE RULES----------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// peepmatch ( root_instr_name [preceding_instruction]* ); +// +// peepconstraint %{ +// (instruction_number.operand_name relational_op instruction_number.operand_name +// [, ...] ); +// // instruction numbers are zero-based using left to right order in peepmatch +// +// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); +// // provide an instruction_number.operand_name for each operand that appears +// // in the replacement instruction's match rule +// +// ---------VM FLAGS--------------------------------------------------------- +// +// All peephole optimizations can be turned off using -XX:-OptoPeephole +// +// Each peephole rule is given an identifying number starting with zero and +// increasing by one in the order seen by the parser. An individual peephole +// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# +// on the command-line. +// +// ---------CURRENT LIMITATIONS---------------------------------------------- +// +// Only match adjacent instructions in same basic block +// Only equality constraints +// Only constraints between operands, not (0.dest_reg == EAX_enc) +// Only one replacement instruction +// +// ---------EXAMPLE---------------------------------------------------------- +// +// // pertinent parts of existing instructions in architecture description +// instruct movI(eRegI dst, eRegI src) %{ +// match(Set dst (CopyI src)); +// %} +// +// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{ +// match(Set dst (AddI dst src)); +// effect(KILL cr); +// %} +// +// // Change (inc mov) to lea +// peephole %{ +// // increment preceeded by register-register move +// peepmatch ( incI_eReg movI ); +// // require that the destination register of the increment +// // match the destination register of the move +// peepconstraint ( 0.dst == 1.dst ); +// // construct a replacement instruction that sets +// // the destination to ( move's source register + one ) +// peepreplace ( incI_eReg_immI1( 0.dst 1.src 0.src ) ); +// %} +// + +// // Change load of spilled value to only a spill +// instruct storeI(memory mem, eRegI src) %{ +// match(Set mem (StoreI mem src)); +// %} +// +// instruct loadI(eRegI dst, memory mem) %{ +// match(Set dst (LoadI mem)); +// %} +// +// peephole %{ +// peepmatch ( loadI storeI ); +// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); +// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); +// %} + +//----------SMARTSPILL RULES--------------------------------------------------- +// These must follow all instruction definitions as they use the names +// defined in the instructions definitions. +// +// ARM will probably not have any of these rules due to RISC instruction set. + +//----------PIPELINE----------------------------------------------------------- +// Rules which define the behavior of the target architectures pipeline. --- /dev/null 2018-09-25 19:24:07.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/aarch32Test.cpp 2018-09-25 19:24:07.000000000 +0300 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include + +#include "precompiled.hpp" +#include "code/codeBlob.hpp" +#include "asm/macroAssembler.hpp" + +// hook routine called during JVM bootstrap to test AArch32 assembler + +extern "C" void entry(CodeBuffer*); + +void aarch32TestHook() +{ + BufferBlob* b = BufferBlob::create("aarch32Test", 500000); + CodeBuffer code(b); + MacroAssembler _masm(&code); + entry(&code); +} --- /dev/null 2018-09-25 19:24:09.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/abstractInterpreter_aarch32.cpp 2018-09-25 19:24:08.000000000 +0300 @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "oops/method.hpp" +#include "runtime/frame.inline.hpp" +#include "utilities/debug.hpp" + +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved rfp thru expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::get_interpreter_frame_initial_sp_offset()) + entry_size; + + const int stub_code = frame::get_entry_frame_after_call_words(); + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return (overhead_size + method_stack + stub_code); +} + +// asm based interpreter deoptimization helpers +int AbstractInterpreter::size_activation(int max_stack, + int temps, + int extra_args, + int monitors, + int callee_params, + int callee_locals, + bool is_top_frame) { + // Note: This calculation must exactly parallel the frame setup + // in TemplateInterpreterGenerator::generate_method_entry. + + // fixed size of an interpreter frame: + int overhead = frame::sender_sp_offset - + frame::get_interpreter_frame_initial_sp_offset(); + // Our locals were accounted for by the caller (or last_frame_adjust + // on the transistion) Since the callee parameters already account + // for the callee's params we only need to account for the extra + // locals. + int size = overhead + + (callee_locals - callee_params)*Interpreter::stackElementWords + + monitors * frame::interpreter_frame_monitor_size() + + temps* Interpreter::stackElementWords + extra_args; + + // On AArch32 we always keep the stack pointer 16-aligned, so we + // must round up here. + size = align_up(size, 2); + + return size; +} + +void AbstractInterpreter::layout_activation(Method* method, + int tempcount, + int popframe_extra_args, + int moncount, + int caller_actual_parameters, + int callee_param_count, + int callee_locals, + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + // The frame interpreter_frame is guaranteed to be the right size, + // as determined by a previous call to the size_activation() method. + // It is also guaranteed to be walkable even though it is in a + // skeletal state + + int max_locals = method->max_locals() * Interpreter::stackElementWords; + int extra_locals = (method->max_locals() - method->size_of_parameters()) * + Interpreter::stackElementWords; + +#ifdef ASSERT + assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable"); +#endif + + interpreter_frame->interpreter_frame_set_method(method); + // NOTE the difference in using sender_sp and + // interpreter_frame_sender_sp interpreter_frame_sender_sp is + // the original sp of the caller (the unextended_sp) and + // sender_sp is fp+8/16 (32bit/64bit) XXX + intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; + +#ifdef ASSERT + if (caller->is_interpreted_frame()) { + assert(locals < caller->fp() + frame::get_interpreter_frame_initial_sp_offset(), "bad placement"); + } +#endif + + interpreter_frame->interpreter_frame_set_locals(locals); + BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); + BasicObjectLock* monbot = montop - moncount; + interpreter_frame->interpreter_frame_set_monitor_end(monbot); + + // Set last_sp + intptr_t* last_sp = (intptr_t*) monbot - + tempcount*Interpreter::stackElementWords - + popframe_extra_args; + interpreter_frame->interpreter_frame_set_last_sp(last_sp); + + // All frames but the initial (oldest) interpreter frame we fill in have + // a value for sender_sp that allows walking the stack but isn't + // truly correct. Correct the value here. + if (extra_locals != 0 && + interpreter_frame->sender_sp() == + interpreter_frame->interpreter_frame_sender_sp()) { + interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + + extra_locals); + } + *interpreter_frame->interpreter_frame_cache_addr() = + method->constants()->cache(); + *interpreter_frame->interpreter_frame_mirror_addr() = + method->method_holder()->java_mirror(); +} --- /dev/null 2018-09-25 19:24:10.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/assembler_aarch32.cpp 2018-09-25 19:24:09.000000000 +0300 @@ -0,0 +1,2149 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * reserved. DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE + * HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "register_aarch32.hpp" +#include "vm_version_aarch32.hpp" + +extern "C" void entry(CodeBuffer *cb); + +#define __ _masm. +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif + +#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") + +void entry(CodeBuffer *cb) { + + // { + // for (int i = 0; i < 256; i+=16) + // { + // printf("\"%20.20g\", ", unpack(i)); + // printf("\"%20.20g\", ", unpack(i+1)); + // } + // printf("\n"); + // } + +#if defined(ASSERT) && !defined (__SOFTFP__) + Assembler _masm(cb); + address entry = __ pc(); + + // Smoke test for assembler + + // we're checking the code generation, not applicability of the code to the actual target + // so temporarily override the detected cpu to allow emission of all instructions + const ProcessorFeatures detected_features = VM_Version::features(); + VM_Version::features(FT_ALL); + +// BEGIN Generated code -- do not edit +// Generated by aarch32-asmtest.py + Label back, forth, near, near_post, near_flt, near_post_flt; + __ bind(back); + +// ThreeRegSft + __ add(r8, r2, r11, ::lsr(10)); // add r8, r2, r11, lsr #10 + __ adds(r1, r3, r7, ::asr(1), Assembler::EQ); // addEQs r1, r3, r7, asr #1 + __ eor(r0, r9, r4, ::lsl(5)); // eor r0, r9, r4, lsl #5 + __ eors(r9, r2, r6, ::rrx(), Assembler::GT); // eorGTs r9, r2, r6, rrx + __ sub(r0, r12, lr, ::lsr(0), Assembler::GT); // subGT r0, r12, lr, lsr #0 + __ subs(r8, r2, r4, ::ror(6), Assembler::EQ); // subEQs r8, r2, r4, ror #6 + __ rsb(r8, r9, sp, ::lsl(3)); // rsb r8, r9, sp, lsl #3 + __ rsbs(r8, r0, r4, ::ror(16), Assembler::VS); // rsbVSs r8, r0, r4, ror #16 + __ add(r9, r5, r1, ::lsr(15), Assembler::LE); // addLE r9, r5, r1, lsr #15 + __ adds(r1, sp, r6, ::asr(5)); // adds r1, sp, r6, asr #5 + __ adc(r11, sp, r7, ::asr(1), Assembler::GT); // adcGT r11, sp, r7, asr #1 + __ adcs(r0, r8, r9, ::lsr(6)); // adcs r0, r8, r9, lsr #6 + __ sbc(r9, r3, r6, ::ror(5)); // sbc r9, r3, r6, ror #5 + __ sbcs(r1, sp, r5, ::asr(16), Assembler::HI); // sbcHIs r1, sp, r5, asr #16 + __ rsc(r8, r2, r6, ::lsl(9), Assembler::CC); // rscCC r8, r2, r6, lsl #9 + __ rscs(r10, r4, sp, ::ror(14)); // rscs r10, r4, sp, ror #14 + __ orr(r11, sp, r5, ::lsl(15), Assembler::NE); // orrNE r11, sp, r5, lsl #15 + __ orrs(r9, r10, r4, ::ror(14)); // orrs r9, r10, r4, ror #14 + __ bic(r9, sp, r5, ::ror(1)); // bic r9, sp, r5, ror #1 + __ bics(r0, r2, r7, ::asr(10)); // bics r0, r2, r7, asr #10 + +// ThreeRegRSR + __ add(sp, r6, r7, ::ror(r7)); // add sp, r6, r7, ror r7 + __ adds(r4, r12, r6, ::ror(r7), Assembler::HI); // addHIs r4, r12, r6, ror r7 + __ eor(r5, r6, r7, ::asr(r12), Assembler::LS); // eorLS r5, r6, r7, asr r12 + __ eors(r8, r5, sp, ::lsl(r4), Assembler::AL); // eorALs r8, r5, sp, lsl r4 + __ sub(r2, r12, r5, ::asr(r0)); // sub r2, r12, r5, asr r0 + __ subs(r9, r3, r7, ::lsl(r12), Assembler::HS); // subHSs r9, r3, r7, lsl r12 + __ rsb(r9, r12, r4, ::lsl(r6), Assembler::GT); // rsbGT r9, r12, r4, lsl r6 + __ rsbs(r8, r2, r12, ::lsl(r1)); // rsbs r8, r2, r12, lsl r1 + __ add(r4, r12, sp, ::lsl(sp)); // add r4, r12, sp, lsl sp + __ adds(r8, r11, r6, ::ror(sp)); // adds r8, r11, r6, ror sp + __ adc(r0, r2, r5, ::lsl(r4), Assembler::NE); // adcNE r0, r2, r5, lsl r4 + __ adcs(r11, lr, r6, ::asr(r2)); // adcs r11, lr, r6, asr r2 + __ sbc(r8, r10, lr, ::asr(r3), Assembler::HI); // sbcHI r8, r10, lr, asr r3 + __ sbcs(r1, r12, r5, ::lsl(r6)); // sbcs r1, r12, r5, lsl r6 + __ rsc(r4, r5, lr, ::ror(r10), Assembler::VS); // rscVS r4, r5, lr, ror r10 + __ rscs(r1, r12, sp, ::lsl(r8)); // rscs r1, r12, sp, lsl r8 + __ orr(r8, r1, r6, ::ror(r0), Assembler::VS); // orrVS r8, r1, r6, ror r0 + __ orrs(r11, sp, r7, ::ror(r5)); // orrs r11, sp, r7, ror r5 + __ bic(r4, lr, r6, ::lsl(r2), Assembler::AL); // bicAL r4, lr, r6, lsl r2 + __ bics(r10, r11, sp, ::lsl(r3)); // bics r10, r11, sp, lsl r3 + +// TwoRegImm + __ add(r8, sp, (unsigned)268435462U, Assembler::HI); // addHI r8, sp, #268435462 + __ adds(sp, lr, (unsigned)162529280U); // adds sp, lr, #162529280 + __ eor(lr, r6, (unsigned)8192000U); // eor lr, r6, #8192000 + __ eors(r2, r3, (unsigned)292U); // eors r2, r3, #292 + __ sub(r4, sp, (unsigned)227540992U); // sub r4, sp, #227540992 + __ subs(r1, lr, (unsigned)33554432U, Assembler::LT); // subLTs r1, lr, #33554432 + __ rsb(r0, r5, (unsigned)2483027968U); // rsb r0, r5, #2483027968 + __ rsbs(r8, r4, (unsigned)3080192U, Assembler::LO); // rsbLOs r8, r4, #3080192 + __ add(r9, r4, (unsigned)2147483648U, Assembler::LT); // addLT r9, r4, #2147483648 + __ adds(r8, r4, (unsigned)32768U, Assembler::AL); // addALs r8, r4, #32768 + __ adc(r10, lr, (unsigned)10752U, Assembler::CS); // adcCS r10, lr, #10752 + __ adcs(r10, r6, (unsigned)774144U); // adcs r10, r6, #774144 + __ sbc(r2, r12, (unsigned)637534208U); // sbc r2, r12, #637534208 + __ sbcs(r8, r10, (unsigned)692060160U); // sbcs r8, r10, #692060160 + __ rsc(sp, r6, (unsigned)7405568U); // rsc sp, r6, #7405568 + __ rscs(r10, r11, (unsigned)244318208U, Assembler::NE); // rscNEs r10, r11, #244318208 + __ orr(r3, r7, (unsigned)66846720U, Assembler::VS); // orrVS r3, r7, #66846720 + __ orrs(r2, r5, (unsigned)1327104U, Assembler::EQ); // orrEQs r2, r5, #1327104 + __ bic(r8, r1, (unsigned)3744U, Assembler::VS); // bicVS r8, r1, #3744 + __ bics(r0, r2, (unsigned)2684354560U, Assembler::LO); // bicLOs r0, r2, #2684354560 + +// TwoRegSft + __ tst(r8, sp, ::lsl(5)); // tst r8, sp, lsl #5 + __ teq(r6, r7, ::lsr(3)); // teq r6, r7, lsr #3 + __ cmp(r12, r4, ::ror(2)); // cmp r12, r4, ror #2 + __ cmn(r5, r7, ::lsl(16), Assembler::LT); // cmnLT r5, r7, lsl #16 + +// TwoRegRSR + __ tst(r2, lr, ::lsr(r7)); // tst r2, lr, lsr r7 + __ teq(r0, r2, ::ror(r5), Assembler::CC); // teqCC r0, r2, ror r5 + __ cmp(lr, r7, ::lsr(r11), Assembler::LS); // cmpLS lr, r7, lsr r11 + __ cmn(r10, r7, ::lsl(r11), Assembler::VS); // cmnVS r10, r7, lsl r11 + +// OneRegImm + __ tst(r2, (unsigned)557842432U); // tst r2, #557842432 + __ teq(lr, (unsigned)7077888U, Assembler::MI); // teqMI lr, #7077888 + __ cmp(r5, (unsigned)939524096U); // cmp r5, #939524096 + __ cmn(r7, (unsigned)2147483650U, Assembler::LO); // cmnLO r7, #2147483650 + +// Shift op + __ lsl(r0, r4, (unsigned)23U); // lsl r0, r4, #23 + __ lsls(r1, r4, (unsigned)9U); // lsls r1, r4, #9 + __ lsr(r0, r10, (unsigned)3U); // lsr r0, r10, #3 + __ lsrs(r0, r10, (unsigned)20U); // lsrs r0, r10, #20 + __ asr(r1, r9, (unsigned)11U); // asr r1, r9, #11 + __ asrs(r2, r11, (unsigned)10U, Assembler::VS); // asrVSs r2, r11, #10 + +// shift op + __ ror(r8, r2, (unsigned)31U, Assembler::CC); // rorCC r8, r2, #31 + __ rors(r9, r12, (unsigned)8U); // rors r9, r12, #8 + +// ThreeRegNon + __ ror(r8, lr, r7); // ror r8, lr, r7 + __ rors(r12, r3, r4); // rors r12, r3, r4 + __ lsl(r12, sp, lr, Assembler::GT); // lslGT r12, sp, lr + __ lsls(r12, sp, r6, Assembler::AL); // lslALs r12, sp, r6 + __ lsr(r0, r1, r9, Assembler::GT); // lsrGT r0, r1, r9 + __ lsrs(r11, r3, r12, Assembler::GT); // lsrGTs r11, r3, r12 + __ asr(r2, r12, r6, Assembler::LE); // asrLE r2, r12, r6 + __ asrs(r1, r10, r6, Assembler::LT); // asrLTs r1, r10, r6 + +// TwoRegNon + __ mov(r10, r3); // mov r10, r3 + __ movs(r0, r9); // movs r0, r9 + +// OneRegImm + __ mov_i(r3, (unsigned)656U, Assembler::VC); // movVC r3, #656 + __ movs_i(r4, (unsigned)2064384U); // movs r4, #2064384 + +// TwoRegSft + __ mov(r12, r6, ::lsr(3)); // mov r12, r6, lsr #3 + __ movs(r5, sp, ::asr(10), Assembler::VC); // movVCs r5, sp, asr #10 + +// TwoRegRSR + __ mov(r1, lr, ::ror(r3)); // mov r1, lr, ror r3 + __ movs(r8, r12, ::ror(r9), Assembler::EQ); // movEQs r8, r12, ror r9 + +// OneRegImm16 + __ movw_i(r11, (unsigned)53041U, Assembler::LO); // movwLO r11, #53041 + __ movt_i(r9, (unsigned)11255U, Assembler::LO); // movtLO r9, #11255 + +// ThreeRegNon + __ mul(r1, sp, r5, Assembler::LE); // mulLE r1, sp, r5 + __ muls(r0, r10, r11); // muls r0, r10, r11 + +// FourRegNon + __ mla(r0, r3, r12, r7); // mla r0, r3, r12, r7 + __ mlas(r8, r11, r3, r6, Assembler::EQ); // mlaEQs r8, r11, r3, r6 + __ umull(lr, r4, r5, r6); // umull lr, r4, r5, r6 + __ umulls(r0, r4, r6, r7); // umulls r0, r4, r6, r7 + __ umlal(r8, r0, r11, lr); // umlal r8, r0, r11, lr + __ umlals(r11, r4, lr, r7); // umlals r11, r4, lr, r7 + __ smull(r1, r5, r6, r7, Assembler::HS); // smullHS r1, r5, r6, r7 + __ smulls(r0, r11, r12, r5, Assembler::MI); // smullMIs r0, r11, r12, r5 + +// FourRegNon + __ umaal(r8, r9, r2, r5); // umaal r8, r9, r2, r5 + __ mls(r0, r4, sp, lr, Assembler::EQ); // mlsEQ r0, r4, sp, lr + +// ThreeRegNon + __ qadd(r9, r4, sp, Assembler::PL); // qaddPL r9, r4, sp + __ qsub(r0, r12, r5, Assembler::MI); // qsubMI r0, r12, r5 + __ qdadd(r3, r5, r7); // qdadd r3, r5, r7 + __ qdsub(r9, r2, r4); // qdsub r9, r2, r4 + +// FourRegNon + __ smlabb(r1, r12, r5, r6); // smlabb r1, r12, r5, r6 + __ smlabt(r0, r10, r12, r6); // smlabt r0, r10, r12, r6 + __ smlatb(r8, r1, r3, lr); // smlatb r8, r1, r3, lr + __ smlatt(r1, sp, r6, r7); // smlatt r1, sp, r6, r7 + __ smlawb(r0, r3, r4, r6); // smlawb r0, r3, r4, r6 + __ smlawt(r11, r4, lr, r7); // smlawt r11, r4, lr, r7 + __ smlalbb(r0, r10, r6, r7); // smlalbb r0, r10, r6, r7 + __ smlalbt(r3, r11, r4, lr, Assembler::LS); // smlalbtLS r3, r11, r4, lr + __ smlaltb(r8, r11, r3, r12); // smlaltb r8, r11, r3, r12 + __ smlaltt(r8, r1, r3, r5); // smlaltt r8, r1, r3, r5 + +// ThreeRegNon + __ smulwb(r2, r12, sp, Assembler::HS); // smulwbHS r2, r12, sp + __ smulwt(r8, r12, r6); // smulwt r8, r12, r6 + __ smulbb(r2, r6, lr, Assembler::GE); // smulbbGE r2, r6, lr + __ smulbt(r8, r12, r7); // smulbt r8, r12, r7 + __ smultb(r10, r3, lr, Assembler::EQ); // smultbEQ r10, r3, lr + __ smultt(r0, r3, sp); // smultt r0, r3, sp + +// MemoryOp + __ ldr(r10, Address(r7, r9, lsl(), Address::ADD, Address::post)); // ldr r10, [r7], r9 + __ ldrb(r0, Address(r9, 196)); // ldrb r0, [r9, #196] + __ ldrh(lr, Address(r4, r6, lsl(), Address::ADD, Address::pre)); // ldrh lr, [r4, r6]! + __ ldrsb(r6, Address(__ pre(r9, 232))); // ldrsb r6, [r9, #232]! + __ ldrsh(r2, Address(r1, r1, lsl(), Address::ADD, Address::post)); // ldrsh r2, [r1], r1 + __ str(r0, Address(r9, r4, lsl(), Address::ADD, Address::post)); // str r0, [r9], r4 + __ strb(r3, Address(__ pre(r5, 92))); // strb r3, [r5, #92]! + __ strh(r2, Address(r8, 160)); // strh r2, [r8, #160] + +// MemoryOp + __ ldr(r8, Address(r12, r8, lsl(), Address::ADD, Address::off)); // ldr r8, [r12, r8] + __ ldrb(r11, Address(__ post(r10, 16))); // ldrb r11, [r10], #16 + __ ldrh(r11, Address(r10, r6, lsl(), Address::ADD, Address::off)); // ldrh r11, [r10, r6] + __ ldrsb(r5, Address(r11, r10, lsl(), Address::ADD, Address::pre)); // ldrsb r5, [r11, r10]! + __ ldrsh(r6, Address(r3, r7, lsl(), Address::ADD, Address::off)); // ldrsh r6, [r3, r7] + __ str(r7, Address(sp, r5, lsl(), Address::ADD, Address::pre)); // str r7, [sp, r5]! + __ strb(r2, Address(r10)); // strb r2, [r10] + __ strh(r6, Address(r4, r3, lsl(), Address::ADD, Address::post)); // strh r6, [r4], r3 + +// MemoryOp + __ ldr(r10, Address(r12)); // ldr r10, [r12] + __ ldrb(r4, Address(__ post(r11, 132))); // ldrb r4, [r11], #132 + __ ldrh(r9, Address(r9, r12, lsl(), Address::ADD, Address::post)); // ldrh r9, [r9], r12 + __ ldrsb(r9, Address(__ post(r3, 148))); // ldrsb r9, [r3], #148 + __ ldrsh(r11, Address(__ pre(r2, 148))); // ldrsh r11, [r2, #148]! + __ str(r11, Address(sp, r11, lsl(), Address::ADD, Address::off)); // str r11, [sp, r11] + __ strb(r1, Address(sp, r10, lsl(), Address::ADD, Address::off)); // strb r1, [sp, r10] + __ strh(r10, Address(lr, r9, lsl(), Address::ADD, Address::post)); // strh r10, [lr], r9 + +// MemoryOp + __ ldr(r6, Address(r3, r4, lsl(), Address::ADD, Address::pre)); // ldr r6, [r3, r4]! + __ ldrb(r4, Address(r6, sp, lsl(), Address::ADD, Address::pre)); // ldrb r4, [r6, sp]! + __ ldrh(r6, Address(r7, r10, lsl(), Address::ADD, Address::post)); // ldrh r6, [r7], r10 + __ ldrsb(r0, Address(r6, r11, lsl(), Address::ADD, Address::pre)); // ldrsb r0, [r6, r11]! + __ ldrsh(r10, Address(r6, sp, lsl(), Address::ADD, Address::post)); // ldrsh r10, [r6], sp + __ str(r7, Address(r3, r12, lsl(), Address::ADD, Address::off)); // str r7, [r3, r12] + __ strb(r3, Address(r8, r1, lsl(), Address::ADD, Address::pre)); // strb r3, [r8, r1]! + __ strh(r4, Address(r12, 64)); // strh r4, [r12, #64] + + __ bind(near); + +// LitMemoryOp + __ ldr(r1, near); // ldr r1, near + __ ldrb(r7, __ pc()); // ldrb r7, . + __ ldrh(r2, near); // ldrh r2, near + __ ldrsb(r10, __ pc()); // ldrsb r10, . + __ ldrsh(lr, near_post); // ldrsh lr, near_post + +// LitMemoryOp + __ ldr(r2, __ pc()); // ldr r2, . + __ ldrb(r3, __ pc()); // ldrb r3, . + __ ldrh(r7, near_post); // ldrh r7, near_post + __ ldrsb(sp, __ pc()); // ldrsb sp, . + __ ldrsh(r10, near); // ldrsh r10, near + +// LitMemoryOp + __ ldr(r5, __ pc()); // ldr r5, . + __ ldrb(lr, near_post); // ldrb lr, near_post + __ ldrh(r5, near_post); // ldrh r5, near_post + __ ldrsb(r6, near); // ldrsb r6, near + __ ldrsh(r11, near); // ldrsh r11, near + +// LitMemoryOp + __ ldr(r7, near_post); // ldr r7, near_post + __ ldrb(r5, near_post); // ldrb r5, near_post + __ ldrh(r10, near); // ldrh r10, near + __ ldrsb(r6, near_post); // ldrsb r6, near_post + __ ldrsh(r9, __ pc()); // ldrsh r9, . + + __ bind(near_post); + +// MemoryRegRegSftOp + __ ldr(r0, Address(r0, r10, ::ror(6), Address::ADD, Address::post)); // ldr r0, [r0], r10, ror #6 + __ ldrb(r3, Address(r8, lr, ::lsl(9), Address::ADD, Address::off)); // ldrb r3, [r8, lr, lsl #9] + __ str(r5, Address(sp, r3, ::lsl(15), Address::ADD, Address::off)); // str r5, [sp, r3, lsl #15] + __ strb(r9, Address(r9, r5, ::asr(2), Address::ADD, Address::post)); // strb r9, [r9], r5, asr #2 + +// MemoryRegRegSftOp + __ ldr(r5, Address(r4, r0, ::ror(6), Address::ADD, Address::off)); // ldr r5, [r4, r0, ror #6] + __ ldrb(lr, Address(r0, r4, ::lsr(9), Address::ADD, Address::off)); // ldrb lr, [r0, r4, lsr #9] + __ str(r5, Address(r12, r12, ::asr(5), Address::ADD, Address::post)); // str r5, [r12], r12, asr #5 + __ strb(r3, Address(r1, r7, ::ror(12), Address::ADD, Address::pre)); // strb r3, [r1, r7, ror #12]! + +// MemoryRegRegSftOp + __ ldr(r6, Address(r2, r3, ::rrx(), Address::ADD, Address::pre)); // ldr r6, [r2, r3, rrx]! + __ ldrb(r8, Address(lr, r2, ::asr(16), Address::ADD, Address::pre)); // ldrb r8, [lr, r2, asr #16]! + __ str(r6, Address(r3, r6, ::ror(7), Address::ADD, Address::pre)); // str r6, [r3, r6, ror #7]! + __ strb(r3, Address(r8, r2, ::lsl(10), Address::ADD, Address::off)); // strb r3, [r8, r2, lsl #10] + +// MemoryRegRegSftOp + __ ldr(r11, Address(sp, lr, ::lsl(8), Address::ADD, Address::off)); // ldr r11, [sp, lr, lsl #8] + __ ldrb(r10, Address(sp, r12, ::lsl(4), Address::ADD, Address::pre)); // ldrb r10, [sp, r12, lsl #4]! + __ str(sp, Address(r9, r2, ::asr(2), Address::ADD, Address::off)); // str sp, [r9, r2, asr #2] + __ strb(r7, Address(r11, lr, ::asr(14), Address::ADD, Address::pre)); // strb r7, [r11, lr, asr #14]! + +// LdStOne + __ ldrex(r12, r11); // ldrex r12, [r11] + __ ldrexb(r4, r12); // ldrexb r4, [r12] + __ ldrexh(r11, r11); // ldrexh r11, [r11] + +// LdStTwo + __ strex(r1, r7, lr); // strex r1, r7, [lr] + __ strexb(r12, r6, r4); // strexb r12, r6, [r4] + __ strexh(r4, r6, r7, Assembler::HS); // strexhHS r4, r6, [r7] + +// ThreeRegNon + __ sadd16(r3, r4, r7); // sadd16 r3, r4, r7 + __ sasx(r9, r10, r3, Assembler::AL); // sasxAL r9, r10, r3 + __ ssax(r12, r5, r6); // ssax r12, r5, r6 + __ ssub16(r12, r5, lr); // ssub16 r12, r5, lr + __ sadd8(r0, r10, r7); // sadd8 r0, r10, r7 + __ ssub8(r0, r8, r2, Assembler::VS); // ssub8VS r0, r8, r2 + __ qadd16(r11, r4, r5, Assembler::PL); // qadd16PL r11, r4, r5 + __ qasx(r11, r3, r12, Assembler::VS); // qasxVS r11, r3, r12 + __ qsax(r0, r3, r5); // qsax r0, r3, r5 + __ ssub16(r10, r12, r5, Assembler::AL); // ssub16AL r10, r12, r5 + __ qadd8(r10, r6, lr, Assembler::CC); // qadd8CC r10, r6, lr + __ qsub8(r10, r11, r7); // qsub8 r10, r11, r7 + __ shadd16(r9, r4, lr, Assembler::PL); // shadd16PL r9, r4, lr + __ shasx(r1, lr, r7); // shasx r1, lr, r7 + __ shsax(r9, r11, r5, Assembler::LO); // shsaxLO r9, r11, r5 + __ shsub16(r3, r1, r11, Assembler::GE); // shsub16GE r3, r1, r11 + __ shadd8(sp, r5, r7, Assembler::GT); // shadd8GT sp, r5, r7 + __ shsub8(r1, r5, r7); // shsub8 r1, r5, r7 + +// ThreeRegNon + __ uadd16(r10, r4, r7); // uadd16 r10, r4, r7 + __ uasx(r1, r9, r7, Assembler::HS); // uasxHS r1, r9, r7 + __ usax(r11, sp, r7); // usax r11, sp, r7 + __ usub16(r11, r4, lr); // usub16 r11, r4, lr + __ uadd8(r2, sp, r7, Assembler::LO); // uadd8LO r2, sp, r7 + __ usub8(r8, r10, lr, Assembler::GT); // usub8GT r8, r10, lr + __ uqadd16(r3, r12, sp); // uqadd16 r3, r12, sp + __ uqasx(r4, sp, r6); // uqasx r4, sp, r6 + __ uqsax(r1, r10, lr); // uqsax r1, r10, lr + __ uqsub16(r2, sp, lr, Assembler::LE); // uqsub16LE r2, sp, lr + __ uqadd8(r1, r12, r5); // uqadd8 r1, r12, r5 + __ uqsub8(r0, r4, sp, Assembler::GT); // uqsub8GT r0, r4, sp + __ uhadd16(r0, r10, r5, Assembler::HI); // uhadd16HI r0, r10, r5 + __ uhasx(r11, r4, r7, Assembler::LE); // uhasxLE r11, r4, r7 + __ uhsax(r1, lr, r9, Assembler::GE); // uhsaxGE r1, lr, r9 + __ uhsub16(r2, r11, lr); // uhsub16 r2, r11, lr + __ uhadd8(r9, r4, r5, Assembler::GE); // uhadd8GE r9, r4, r5 + __ uhsub8(r2, sp, lr, Assembler::HI); // uhsub8HI r2, sp, lr + +// PKUPSATREV + __ sxtab16(r10, r3, r7, ::ror(16)); // sxtab16 r10, r3, r7, ROR #16 + __ sxtab(r9, r5, r7, ::ror(24), Assembler::CS); // sxtabCS r9, r5, r7, ROR #24 + __ sxtah(r3, r5, r7, ::ror(8)); // sxtah r3, r5, r7, ROR #8 + __ uxtab16(r8, r4, r6, ::ror(8), Assembler::AL); // uxtab16AL r8, r4, r6, ROR #8 + __ uxtab(r0, r11, sp, ::rrx(), Assembler::EQ); // uxtabEQ r0, r11, sp, ROR #0 + __ uxtah(r9, r12, r5, ::rrx()); // uxtah r9, r12, r5, ROR #0 + +// PKUPSATREV + __ sxtb16(r3, r11, ::ror(16), Assembler::GE); // sxtb16GE r3, r11, ROR #16 + __ sxtb(r2, r6, ::rrx(), Assembler::HI); // sxtbHI r2, r6, ROR #0 + __ sxth(r3, sp, ::ror(24), Assembler::GT); // sxthGT r3, sp, ROR #24 + __ uxtb16(r12, r5, ::ror(16)); // uxtb16 r12, r5, ROR #16 + __ uxtb(r12, r5, ::ror(16)); // uxtb r12, r5, ROR #16 + __ uxth(r8, r5, ::ror(16)); // uxth r8, r5, ROR #16 + +// TwoRegNon + __ rev(r10, r4, Assembler::EQ); // revEQ r10, r4 + __ rev16(r8, r12, Assembler::GE); // rev16GE r8, r12 + __ rbit(lr, r7); // rbit lr, r7 + __ revsh(sp, r7, Assembler::GT); // revshGT sp, r7 + +// ThreeRegNon + __ sdiv(r9, sp, lr); // sdiv r9, sp, lr + __ udiv(r2, r12, r6); // udiv r2, r12, r6 + +// TwoRegTwoImm + __ sbfx(r0, r1, (unsigned)20U, (unsigned)3U, Assembler::MI); // sbfxMI r0, r1, #20, #3 + __ ubfx(r9, r2, (unsigned)16U, (unsigned)15U); // ubfx r9, r2, #16, #15 + __ bfi(r1, r11, (unsigned)27U, (unsigned)3U, Assembler::HI); // bfiHI r1, r11, #27, #3 + +// TwoRegTwoImm + __ bfc(r3, (unsigned)7U, (unsigned)10U); // bfc r3, #7, #10 + +// MultipleMemOp + __ stmda(r6, 3435U, false); // stmda r6, {r0, r1, r3, r5, r6, r8, r10, r11} + __ stmed(r4, 14559U, false); // stmed r4, {r0, r1, r2, r3, r4, r6, r7, r11, r12, sp} + __ ldmda(r0, 57812U, false); // ldmda r0, {r2, r4, r6, r7, r8, sp, lr, pc} + __ ldmfa(r12, 39027U, true); // ldmfa r12!, {r0, r1, r4, r5, r6, r11, r12, pc} + __ stmia(r9, 12733U, true); // stmia r9!, {r0, r2, r3, r4, r5, r7, r8, r12, sp} + __ stmea(r11, 21955U, false); // stmea r11, {r0, r1, r6, r7, r8, r10, r12, lr} + __ ldmia(r12, 48418U, true); // ldmia r12!, {r1, r5, r8, r10, r11, r12, sp, pc} + __ ldmfd(sp, 41226U, true); // ldmfd sp!, {r1, r3, r8, sp, pc} + __ stmdb(r11, 8729U, true); // stmdb r11!, {r0, r3, r4, r9, sp} + __ stmfd(r9, 36309U, true); // stmfd r9!, {r0, r2, r4, r6, r7, r8, r10, r11, pc} + __ ldmdb(r5, 24667U, true); // ldmdb r5!, {r0, r1, r3, r4, r6, sp, lr} + __ ldmea(r1, 37287U, false); // ldmea r1, {r0, r1, r2, r5, r7, r8, r12, pc} + __ stmib(r11, 28266U, true); // stmib r11!, {r1, r3, r5, r6, r9, r10, r11, sp, lr} + __ stmfa(r11, 17671U, false); // stmfa r11, {r0, r1, r2, r8, r10, lr} + __ ldmib(r0, 21452U, true); // ldmib r0!, {r2, r3, r6, r7, r8, r9, r12, lr} + __ ldmed(r1, 11751U, false); // ldmed r1, {r0, r1, r2, r5, r6, r7, r8, r10, r11, sp} + +// BranchLabel + __ b(forth, Assembler::CS); // bCS forth + __ bl(__ pc(), Assembler::MI); // blMI . + +// OneRegNon + __ b(r0, Assembler::VS); // bxVS r0 + __ bl(r3); // blx r3 + +// BranchLabel + __ b(__ pc(), Assembler::AL); // bAL . + __ bl(__ pc()); // bl . + +// OneRegNon + __ b(r0, Assembler::VS); // bxVS r0 + __ bl(r5); // blx r5 + +// BranchLabel + __ b(forth, Assembler::LE); // bLE forth + __ bl(__ pc(), Assembler::MI); // blMI . + +// OneRegNon + __ b(r9, Assembler::NE); // bxNE r9 + __ bl(r12); // blx r12 + +// BranchLabel + __ b(back); // b back + __ bl(__ pc(), Assembler::HI); // blHI . + +// OneRegNon + __ b(r1, Assembler::VC); // bxVC r1 + __ bl(r7, Assembler::GT); // blxGT r7 + +// BranchLabel + __ b(back, Assembler::GE); // bGE back + __ bl(__ pc(), Assembler::HI); // blHI . + +// OneRegNon + __ b(r12); // bx r12 + __ bl(r7, Assembler::CC); // blxCC r7 + +// BranchLabel + __ b(__ pc()); // b . + __ bl(back, Assembler::GT); // blGT back + +// OneRegNon + __ b(r1, Assembler::GE); // bxGE r1 + __ bl(r0); // blx r0 + +// BranchLabel + __ b(__ pc()); // b . + __ bl(forth); // bl forth + +// OneRegNon + __ b(lr, Assembler::GT); // bxGT lr + __ bl(r11, Assembler::NE); // blxNE r11 + +// BranchLabel + __ b(__ pc(), Assembler::CS); // bCS . + __ bl(__ pc()); // bl . + +// OneRegNon + __ b(r10, Assembler::HS); // bxHS r10 + __ bl(r4); // blx r4 + +// BranchLabel + __ b(back, Assembler::AL); // bAL back + __ bl(__ pc()); // bl . + +// OneRegNon + __ b(r12, Assembler::LO); // bxLO r12 + __ bl(r8); // blx r8 + +// BranchLabel + __ b(forth); // b forth + __ bl(__ pc()); // bl . + +// OneRegNon + __ b(r10); // bx r10 + __ bl(r1); // blx r1 + +// ThreeFltNon + __ vmla_f32(f4, f8, f12, Assembler::MI); // vmlaMI.f32 s4, s8, s12 + __ vmls_f32(f4, f10, f10); // vmls.f32 s4, s10, s10 + __ vnmla_f32(f2, f10, f12); // vnmla.f32 s2, s10, s12 + __ vnmls_f32(f8, f6, f8, Assembler::LT); // vnmlsLT.f32 s8, s6, s8 + __ vnmul_f32(f6, f12, f14, Assembler::MI); // vnmulMI.f32 s6, s12, s14 + __ vadd_f32(f0, f2, f0); // vadd.f32 s0, s2, s0 + __ vsub_f32(f2, f4, f10, Assembler::AL); // vsubAL.f32 s2, s4, s10 + __ vdiv_f32(f0, f2, f12, Assembler::CS); // vdivCS.f32 s0, s2, s12 + +// ThreeFltNon + __ vmla_f64(d0, d3, d6); // vmla.f64 d0, d3, d6 + __ vmls_f64(d0, d1, d5); // vmls.f64 d0, d1, d5 + __ vnmla_f64(d1, d4, d6); // vnmla.f64 d1, d4, d6 + __ vnmls_f64(d0, d1, d1, Assembler::NE); // vnmlsNE.f64 d0, d1, d1 + __ vnmul_f64(d3, d5, d5, Assembler::NE); // vnmulNE.f64 d3, d5, d5 + __ vadd_f64(d0, d2, d4, Assembler::LO); // vaddLO.f64 d0, d2, d4 + __ vsub_f64(d1, d2, d4); // vsub.f64 d1, d2, d4 + __ vdiv_f64(d0, d1, d5, Assembler::MI); // vdivMI.f64 d0, d1, d5 + +// TwoFltNon + __ vabs_f32(f6, f6); // vabs.f32 s6, s6 + __ vneg_f32(f6, f8, Assembler::PL); // vnegPL.f32 s6, s8 + __ vsqrt_f32(f0, f8); // vsqrt.f32 s0, s8 + +// TwoFltNon + __ vabs_f64(d0, d4); // vabs.f64 d0, d4 + __ vneg_f64(d1, d4); // vneg.f64 d1, d4 + __ vsqrt_f64(d0, d1); // vsqrt.f64 d0, d1 + +// vmov_f32 + __ vmov_f32(f0, lr, Assembler::PL); // vmovPL.f32 s0, lr + +// vmov_f32 + __ vmov_f32(r11, f8); // vmov.f32 r11, s8 + +// vmov_f64 + __ vmov_f64(d1, r11, lr, Assembler::LT); // vmovLT.f64 d1, r11, lr + +// vmov_f64 + __ vmov_f64(r7, r5, d5); // vmov.f64 r7, r5, d5 + +// vmov_f32 + __ vmov_f32(f8, f12); // vmov.f32 s8, s12 + +// vmov_f64 + __ vmov_f64(d1, d2, Assembler::HI); // vmovHI.f64 d1, d2 + +// vmov_f32 + __ vmov_f32(f4, 1.0f, Assembler::VS); // vmovVS.f32 s4, #1.0 + +// vmov_f64 + __ vmov_f64(d2, 1.0); // vmov.f64 d2, #1.0 + +// vmov_f32 + __ vmov_f32(f6, 2.0f); // vmov.f32 s6, #2.0 + +// vmov_f64 + __ vmov_f64(d1, 2.0); // vmov.f64 d1, #2.0 + +// vector memory + __ vldr_f32(f4, Address(r5, 116)); // vldr.f32 s4, [r5, #116] + __ vstr_f32(f2, Address(r1, 56), Assembler::CC); // vstrCC.f32 s2, [r1, #56] + +// vector memory + __ vldr_f64(d7, Address(r5, 16), Assembler::NE); // vldrNE.f64 d7, [r5, #16] + __ vstr_f64(d6, Address(r1, 228)); // vstr.f64 d6, [r1, #228] + + __ bind(near_flt); + +// vector memory + __ vldr_f32(f2, near_post_flt); // vldr.f32 s2, near_post_flt + __ vstr_f32(f6, near_post_flt); // vstr.f32 s6, near_post_flt + +// vector memory + __ vldr_f64(d2, near_flt, Assembler::LT); // vldrLT.f64 d2, near_flt + __ vstr_f64(d3, __ pc(), Assembler::GT); // vstrGT.f64 d3, . + +// vector memory + __ vldr_f32(f4, near_post_flt, Assembler::CC); // vldrCC.f32 s4, near_post_flt + __ vstr_f32(f0, near_post_flt); // vstr.f32 s0, near_post_flt + +// vector memory + __ vldr_f64(d4, near_post_flt, Assembler::GT); // vldrGT.f64 d4, near_post_flt + __ vstr_f64(d0, near_flt); // vstr.f64 d0, near_flt + +// vector memory + __ vldr_f32(f8, near_post_flt); // vldr.f32 s8, near_post_flt + __ vstr_f32(f6, near_post_flt); // vstr.f32 s6, near_post_flt + +// vector memory + __ vldr_f64(d4, near_flt, Assembler::PL); // vldrPL.f64 d4, near_flt + __ vstr_f64(d5, near_flt); // vstr.f64 d5, near_flt + +// vector memory + __ vldr_f32(f8, near_post_flt, Assembler::LS); // vldrLS.f32 s8, near_post_flt + __ vstr_f32(f12, __ pc(), Assembler::CC); // vstrCC.f32 s12, . + +// vector memory + __ vldr_f64(d6, near_post_flt, Assembler::AL); // vldrAL.f64 d6, near_post_flt + __ vstr_f64(d1, near_post_flt, Assembler::LT); // vstrLT.f64 d1, near_post_flt + + __ bind(near_post_flt); + +// FltMultMemOp + __ vldmia_f32(r1, FloatRegSet::of(f4).bits(), false); // vldmia.f32 r1, {s4} + __ vstmia_f32(r6, FloatRegSet::of(f4).bits(), true, Assembler::CS); // vstmiaCS.f32 r6!, {s4} + +// DblMultMemOp + __ vldmia_f64(r9, DoubleFloatRegSet::of(d1, d2, d3, d4).bits(), true); // vldmia.f64 r9!, {d1, d2, d3, d4} + __ vstmia_f64(r3, DoubleFloatRegSet::of(d6, d7).bits(), true); // vstmia.f64 r3!, {d6, d7} + +// FltMultMemOp + __ vldmdb_f32(r2, FloatRegSet::of(f6).bits(), Assembler::VS); // vldmdbVS.f32 r2!, {s6} + __ vstmdb_f32(r6, FloatRegSet::of(f14).bits()); // vstmdb.f32 r6!, {s14} + +// DblMultMemOp + __ vldmdb_f64(sp, DoubleFloatRegSet::of(d4, d5, d6, d7).bits()); // vldmdb.f64 sp!, {d4, d5, d6, d7} + __ vstmdb_f64(r0, DoubleFloatRegSet::of(d5, d6, d7).bits()); // vstmdb.f64 r0!, {d5, d6, d7} + +// vcmp_f32 + __ vcmp_f32(f2, f2); // vcmp.f32 s2, s2 + +// vcmpe_f32 + __ vcmpe_f32(f8, f8, Assembler::VC); // vcmpeVC.f32 s8, s8 + +// vcmp_f64 + __ vcmp_f64(d0, d6); // vcmp.f64 d0, d6 + +// vcmpe_f64 + __ vcmpe_f64(d3, d7, Assembler::GE); // vcmpeGE.f64 d3, d7 + +// vcmp_f32 + __ vcmp_f32(f2, 0.0f, Assembler::LT); // vcmpLT.f32 s2, #0.0 + +// vcmpe_f32 + __ vcmpe_f32(f14, 0.0f, Assembler::GT); // vcmpeGT.f32 s14, #0.0 + +// vcmp_f64 + __ vcmp_f64(d4, 0.0); // vcmp.f64 d4, #0.0 + +// vcmpe_f64 + __ vcmpe_f64(d1, 0.0); // vcmpe.f64 d1, #0.0 + +// vcvt + __ vcvt_s32_f32(f2, f6, Assembler::VS); // vcvtVS.s32.f32 s2, s6 + __ vcvt_u32_f32(f6, f14, Assembler::GT); // vcvtGT.u32.f32 s6, s14 + __ vcvt_f32_s32(f0, f2, Assembler::CC); // vcvtCC.f32.s32 s0, s2 + __ vcvt_f32_u32(f2, f4, Assembler::CC); // vcvtCC.f32.u32 s2, s4 + +// vcvt + __ vcvt_s32_f64(f4, d4, Assembler::HI); // vcvtHI.s32.f64 s4, d4 + __ vcvt_u32_f64(f6, d6, Assembler::HI); // vcvtHI.u32.f64 s6, d6 + __ vcvt_f32_f64(f6, d7, Assembler::LS); // vcvtLS.f32.f64 s6, d7 + +// vcvt + __ vcvt_f64_s32(d3, f8); // vcvt.f64.s32 d3, s8 + __ vcvt_f64_u32(d5, f14, Assembler::EQ); // vcvtEQ.f64.u32 d5, s14 + __ vcvt_f64_f32(d4, f10, Assembler::AL); // vcvtAL.f64.f32 d4, s10 + +// BKPT + __ bkpt((unsigned)26U); // bkpt #26 + + __ bind(forth); + +/* +aarch32ops.o: file format elf32-littlearm + + +Disassembly of section .text: + +00000000 : + 0: e082852b add r8, r2, fp, lsr #10 + 4: 009310c7 addseq r1, r3, r7, asr #1 + 8: e0290284 eor r0, r9, r4, lsl #5 + c: c0329066 eorsgt r9, r2, r6, rrx + 10: c04c000e subgt r0, ip, lr + 14: 00528364 subseq r8, r2, r4, ror #6 + 18: e069818d rsb r8, r9, sp, lsl #3 + 1c: 60708864 rsbsvs r8, r0, r4, ror #16 + 20: d08597a1 addle r9, r5, r1, lsr #15 + 24: e09d12c6 adds r1, sp, r6, asr #5 + 28: c0adb0c7 adcgt fp, sp, r7, asr #1 + 2c: e0b80329 adcs r0, r8, r9, lsr #6 + 30: e0c392e6 sbc r9, r3, r6, ror #5 + 34: 80dd1845 sbcshi r1, sp, r5, asr #16 + 38: 30e28486 rsccc r8, r2, r6, lsl #9 + 3c: e0f4a76d rscs sl, r4, sp, ror #14 + 40: 118db785 orrne fp, sp, r5, lsl #15 + 44: e19a9764 orrs r9, sl, r4, ror #14 + 48: e1cd90e5 bic r9, sp, r5, ror #1 + 4c: e1d20547 bics r0, r2, r7, asr #10 + 50: e086d777 add sp, r6, r7, ror r7 + 54: 809c4776 addshi r4, ip, r6, ror r7 + 58: 90265c57 eorls r5, r6, r7, asr ip + 5c: e035841d eors r8, r5, sp, lsl r4 + 60: e04c2055 sub r2, ip, r5, asr r0 + 64: 20539c17 subscs r9, r3, r7, lsl ip + 68: c06c9614 rsbgt r9, ip, r4, lsl r6 + 6c: e072811c rsbs r8, r2, ip, lsl r1 + 70: e08c4d1d add r4, ip, sp, lsl sp + 74: e09b8d76 adds r8, fp, r6, ror sp + 78: 10a20415 adcne r0, r2, r5, lsl r4 + 7c: e0beb256 adcs fp, lr, r6, asr r2 + 80: 80ca835e sbchi r8, sl, lr, asr r3 + 84: e0dc1615 sbcs r1, ip, r5, lsl r6 + 88: 60e54a7e rscvs r4, r5, lr, ror sl + 8c: e0fc181d rscs r1, ip, sp, lsl r8 + 90: 61818076 orrvs r8, r1, r6, ror r0 + 94: e19db577 orrs fp, sp, r7, ror r5 + 98: e1ce4216 bic r4, lr, r6, lsl r2 + 9c: e1dba31d bics sl, fp, sp, lsl r3 + a0: 828d8261 addhi r8, sp, #268435462 ; 0x10000006 + a4: e29ed69b adds sp, lr, #162529280 ; 0x9b00000 + a8: e226e87d eor lr, r6, #8192000 ; 0x7d0000 + ac: e2332f49 eors r2, r3, #292 ; 0x124 + b0: e24d46d9 sub r4, sp, #227540992 ; 0xd900000 + b4: b25e1402 subslt r1, lr, #33554432 ; 0x2000000 + b8: e2650325 rsb r0, r5, #-1811939328 ; 0x94000000 + bc: 3274882f rsbscc r8, r4, #3080192 ; 0x2f0000 + c0: b2849102 addlt r9, r4, #-2147483648 ; 0x80000000 + c4: e2948902 adds r8, r4, #32768 ; 0x8000 + c8: 22aeac2a adccs sl, lr, #10752 ; 0x2a00 + cc: e2b6aabd adcs sl, r6, #774144 ; 0xbd000 + d0: e2cc2426 sbc r2, ip, #637534208 ; 0x26000000 + d4: e2da85a5 sbcs r8, sl, #692060160 ; 0x29400000 + d8: e2e6d871 rsc sp, r6, #7405568 ; 0x710000 + dc: 12fba6e9 rscsne sl, fp, #244318208 ; 0xe900000 + e0: 638737ff orrvs r3, r7, #66846720 ; 0x3fc0000 + e4: 03952951 orrseq r2, r5, #1327104 ; 0x144000 + e8: 63c18eea bicvs r8, r1, #3744 ; 0xea0 + ec: 33d2020a bicscc r0, r2, #-1610612736 ; 0xa0000000 + f0: e118028d tst r8, sp, lsl #5 + f4: e13601a7 teq r6, r7, lsr #3 + f8: e15c0164 cmp ip, r4, ror #2 + fc: b1750807 cmnlt r5, r7, lsl #16 + 100: e112073e tst r2, lr, lsr r7 + 104: 31300572 teqcc r0, r2, ror r5 + 108: 915e0b37 cmpls lr, r7, lsr fp + 10c: 617a0b17 cmnvs sl, r7, lsl fp + 110: e3120585 tst r2, #557842432 ; 0x21400000 + 114: 433e071b teqmi lr, #7077888 ; 0x6c0000 + 118: e355030e cmp r5, #939524096 ; 0x38000000 + 11c: 3377010a cmncc r7, #-2147483646 ; 0x80000002 + 120: e1a00b84 lsl r0, r4, #23 + 124: e1b01484 lsls r1, r4, #9 + 128: e1a001aa lsr r0, sl, #3 + 12c: e1b00a2a lsrs r0, sl, #20 + 130: e1a015c9 asr r1, r9, #11 + 134: 61b0254b asrsvs r2, fp, #10 + 138: 31a08fe2 rorcc r8, r2, #31 + 13c: e1b0946c rors r9, ip, #8 + 140: e1a0877e ror r8, lr, r7 + 144: e1b0c473 rors ip, r3, r4 + 148: c1a0ce1d lslgt ip, sp, lr + 14c: e1b0c61d lsls ip, sp, r6 + 150: c1a00931 lsrgt r0, r1, r9 + 154: c1b0bc33 lsrsgt fp, r3, ip + 158: d1a0265c asrle r2, ip, r6 + 15c: b1b0165a asrslt r1, sl, r6 + 160: e1a0a003 mov sl, r3 + 164: e1b00009 movs r0, r9 + 168: 73a03e29 movvc r3, #656 ; 0x290 + 16c: e3b0497e movs r4, #2064384 ; 0x1f8000 + 170: e1a0c1a6 lsr ip, r6, #3 + 174: 71b0554d asrsvc r5, sp, #10 + 178: e1a0137e ror r1, lr, r3 + 17c: 01b0897c rorseq r8, ip, r9 + 180: 330cbf31 movwcc fp, #53041 ; 0xcf31 + 184: 33429bf7 movtcc r9, #11255 ; 0x2bf7 + 188: d001059d mulle r1, sp, r5 + 18c: e0100b9a muls r0, sl, fp + 190: e0207c93 mla r0, r3, ip, r7 + 194: 0038639b mlaseq r8, fp, r3, r6 + 198: e084e695 umull lr, r4, r5, r6 + 19c: e0940796 umulls r0, r4, r6, r7 + 1a0: e0a08e9b umlal r8, r0, fp, lr + 1a4: e0b4b79e umlals fp, r4, lr, r7 + 1a8: 20c51796 smullcs r1, r5, r6, r7 + 1ac: 40db059c smullsmi r0, fp, ip, r5 + 1b0: e0498592 umaal r8, r9, r2, r5 + 1b4: 0060ed94 mlseq r0, r4, sp, lr + 1b8: 510d9054 qaddpl r9, r4, sp + 1bc: 4125005c qsubmi r0, ip, r5 + 1c0: e1473055 qdadd r3, r5, r7 + 1c4: e1649052 qdsub r9, r2, r4 + 1c8: e101658c smlabb r1, ip, r5, r6 + 1cc: e1006cca smlabt r0, sl, ip, r6 + 1d0: e108e3a1 smlatb r8, r1, r3, lr + 1d4: e10176ed smlatt r1, sp, r6, r7 + 1d8: e1206483 smlawb r0, r3, r4, r6 + 1dc: e12b7ec4 smlawt fp, r4, lr, r7 + 1e0: e14a0786 smlalbb r0, sl, r6, r7 + 1e4: 914b3ec4 smlalbtls r3, fp, r4, lr + 1e8: e14b8ca3 smlaltb r8, fp, r3, ip + 1ec: e14185e3 smlaltt r8, r1, r3, r5 + 1f0: 21220dac smulwbcs r2, ip, sp + 1f4: e12806ec smulwt r8, ip, r6 + 1f8: a1620e86 smulbbge r2, r6, lr + 1fc: e16807cc smulbt r8, ip, r7 + 200: 016a0ea3 smultbeq sl, r3, lr + 204: e1600de3 smultt r0, r3, sp + 208: e697a009 ldr sl, [r7], r9 + 20c: e5d900c4 ldrb r0, [r9, #196] ; 0xc4 + 210: e1b4e0b6 ldrh lr, [r4, r6]! + 214: e1f96ed8 ldrsb r6, [r9, #232]! ; 0xe8 + 218: e09120f1 ldrsh r2, [r1], r1 + 21c: e6890004 str r0, [r9], r4 + 220: e5e5305c strb r3, [r5, #92]! ; 0x5c + 224: e1c82ab0 strh r2, [r8, #160] ; 0xa0 + 228: e79c8008 ldr r8, [ip, r8] + 22c: e4dab010 ldrb fp, [sl], #16 + 230: e19ab0b6 ldrh fp, [sl, r6] + 234: e1bb50da ldrsb r5, [fp, sl]! + 238: e19360f7 ldrsh r6, [r3, r7] + 23c: e7ad7005 str r7, [sp, r5]! + 240: e5ca2000 strb r2, [sl] + 244: e08460b3 strh r6, [r4], r3 + 248: e59ca000 ldr sl, [ip] + 24c: e4db4084 ldrb r4, [fp], #132 ; 0x84 + 250: e09990bc ldrh r9, [r9], ip + 254: e0d399d4 ldrsb r9, [r3], #148 ; 0x94 + 258: e1f2b9f4 ldrsh fp, [r2, #148]! ; 0x94 + 25c: e78db00b str fp, [sp, fp] + 260: e7cd100a strb r1, [sp, sl] + 264: e08ea0b9 strh sl, [lr], r9 + 268: e7b36004 ldr r6, [r3, r4]! + 26c: e7f6400d ldrb r4, [r6, sp]! + 270: e09760ba ldrh r6, [r7], sl + 274: e1b600db ldrsb r0, [r6, fp]! + 278: e096a0fd ldrsh sl, [r6], sp + 27c: e783700c str r7, [r3, ip] + 280: e7e83001 strb r3, [r8, r1]! + 284: e1cc44b0 strh r4, [ip, #64] ; 0x40 + +00000288 : + 288: e51f1008 ldr r1, [pc, #-8] ; 288 + 28c: e55f7008 ldrb r7, [pc, #-8] ; 28c + 290: e15f21b0 ldrh r2, [pc, #-16] ; 288 + 294: e15fa0d8 ldrsb sl, [pc, #-8] ; 294 + 298: e1dfe3f8 ldrsh lr, [pc, #56] ; 2d8 + 29c: e51f2008 ldr r2, [pc, #-8] ; 29c + 2a0: e55f3008 ldrb r3, [pc, #-8] ; 2a0 + 2a4: e1df72bc ldrh r7, [pc, #44] ; 2d8 + 2a8: e15fd0d8 ldrsb sp, [pc, #-8] ; 2a8 + 2ac: e15fa2fc ldrsh sl, [pc, #-44] ; 288 + 2b0: e51f5008 ldr r5, [pc, #-8] ; 2b0 + 2b4: e5dfe01c ldrb lr, [pc, #28] ; 2d8 + 2b8: e1df51b8 ldrh r5, [pc, #24] ; 2d8 + 2bc: e15f63dc ldrsb r6, [pc, #-60] ; 288 + 2c0: e15fb4f0 ldrsh fp, [pc, #-64] ; 288 + 2c4: e59f700c ldr r7, [pc, #12] ; 2d8 + 2c8: e5df5008 ldrb r5, [pc, #8] ; 2d8 + 2cc: e15fa4bc ldrh sl, [pc, #-76] ; 288 + 2d0: e1df60d0 ldrsb r6, [pc] ; 2d8 + 2d4: e15f90f8 ldrsh r9, [pc, #-8] ; 2d4 + +000002d8 : + 2d8: e690036a ldr r0, [r0], sl, ror #6 + 2dc: e7d8348e ldrb r3, [r8, lr, lsl #9] + 2e0: e78d5783 str r5, [sp, r3, lsl #15] + 2e4: e6c99145 strb r9, [r9], r5, asr #2 + 2e8: e7945360 ldr r5, [r4, r0, ror #6] + 2ec: e7d0e4a4 ldrb lr, [r0, r4, lsr #9] + 2f0: e68c52cc str r5, [ip], ip, asr #5 + 2f4: e7e13667 strb r3, [r1, r7, ror #12]! + 2f8: e7b26063 ldr r6, [r2, r3, rrx]! + 2fc: e7fe8842 ldrb r8, [lr, r2, asr #16]! + 300: e7a363e6 str r6, [r3, r6, ror #7]! + 304: e7c83502 strb r3, [r8, r2, lsl #10] + 308: e79db40e ldr fp, [sp, lr, lsl #8] + 30c: e7fda20c ldrb sl, [sp, ip, lsl #4]! + 310: e789d142 str sp, [r9, r2, asr #2] + 314: e7eb774e strb r7, [fp, lr, asr #14]! + 318: e19bcf9f ldrex r12, [fp] + 31c: e1dc4f9f ldrexb r4, [ip] + 320: e1fbbf9f ldrexh fp, [fp] + 324: e18e1f97 strex r1, r7, [lr] + 328: e1c4cf96 strexb ip, r6, [r4] + 32c: 21e74f96 strexhcs r4, r6, [r7] + 330: e6143f17 sadd16 r3, r4, r7 + 334: e61a9f33 sasx r9, sl, r3 + 338: e615cf56 ssax ip, r5, r6 + 33c: e615cf7e ssub16 ip, r5, lr + 340: e61a0f97 sadd8 r0, sl, r7 + 344: 66180ff2 ssub8vs r0, r8, r2 + 348: 5624bf15 qadd16pl fp, r4, r5 + 34c: 6623bf3c qasxvs fp, r3, ip + 350: e6230f55 qsax r0, r3, r5 + 354: e61caf75 ssub16 sl, ip, r5 + 358: 3626af9e qadd8cc sl, r6, lr + 35c: e62baff7 qsub8 sl, fp, r7 + 360: 56349f1e shadd16pl r9, r4, lr + 364: e63e1f37 shasx r1, lr, r7 + 368: 363b9f55 shsaxcc r9, fp, r5 + 36c: a6313f7b shsub16ge r3, r1, fp + 370: c635df97 shadd8gt sp, r5, r7 + 374: e6351ff7 shsub8 r1, r5, r7 + 378: e654af17 uadd16 sl, r4, r7 + 37c: 26591f37 uasxcs r1, r9, r7 + 380: e65dbf57 usax fp, sp, r7 + 384: e654bf7e usub16 fp, r4, lr + 388: 365d2f97 uadd8cc r2, sp, r7 + 38c: c65a8ffe usub8gt r8, sl, lr + 390: e66c3f1d uqadd16 r3, ip, sp + 394: e66d4f36 uqasx r4, sp, r6 + 398: e66a1f5e uqsax r1, sl, lr + 39c: d66d2f7e uqsub16le r2, sp, lr + 3a0: e66c1f95 uqadd8 r1, ip, r5 + 3a4: c6640ffd uqsub8gt r0, r4, sp + 3a8: 867a0f15 uhadd16hi r0, sl, r5 + 3ac: d674bf37 uhasxle fp, r4, r7 + 3b0: a67e1f59 uhsaxge r1, lr, r9 + 3b4: e67b2f7e uhsub16 r2, fp, lr + 3b8: a6749f95 uhadd8ge r9, r4, r5 + 3bc: 867d2ffe uhsub8hi r2, sp, lr + 3c0: e683a877 sxtab16 sl, r3, r7, ror #16 + 3c4: 26a59c77 sxtabcs r9, r5, r7, ror #24 + 3c8: e6b53477 sxtah r3, r5, r7, ror #8 + 3cc: e6c48476 uxtab16 r8, r4, r6, ror #8 + 3d0: 06eb007d uxtabeq r0, fp, sp + 3d4: e6fc9075 uxtah r9, ip, r5 + 3d8: a68f387b sxtb16ge r3, fp, ror #16 + 3dc: 86af2076 sxtbhi r2, r6 + 3e0: c6bf3c7d sxthgt r3, sp, ror #24 + 3e4: e6cfc875 uxtb16 ip, r5, ror #16 + 3e8: e6efc875 uxtb ip, r5, ror #16 + 3ec: e6ff8875 uxth r8, r5, ror #16 + 3f0: 06bfaf34 reveq sl, r4 + 3f4: a6bf8fbc rev16ge r8, ip + 3f8: e6ffef37 rbit lr, r7 + 3fc: c6ffdfb7 revshgt sp, r7 + 400: e719fe1d sdiv r9, sp, lr + 404: e732f61c udiv r2, ip, r6 + 408: 47a20a51 sbfxmi r0, r1, #20, #3 + 40c: e7ee9852 ubfx r9, r2, #16, #15 + 410: 87dd1d9b bfihi r1, fp, #27, #3 + 414: e7d0339f bfc r3, #7, #10 + 418: e8060d6b stmda r6, {r0, r1, r3, r5, r6, r8, sl, fp} + 41c: e80438df stmda r4, {r0, r1, r2, r3, r4, r6, r7, fp, ip, sp} + 420: e810e1d4 ldmda r0, {r2, r4, r6, r7, r8, sp, lr, pc} + 424: e83c9873 ldmda ip!, {r0, r1, r4, r5, r6, fp, ip, pc} + 428: e8a931bd stmia r9!, {r0, r2, r3, r4, r5, r7, r8, ip, sp} + 42c: e88b55c3 stm fp, {r0, r1, r6, r7, r8, sl, ip, lr} + 430: e8bcbd22 ldm ip!, {r1, r5, r8, sl, fp, ip, sp, pc} + 434: e8bda10a pop {r1, r3, r8, sp, pc} + 438: e92b2219 stmdb fp!, {r0, r3, r4, r9, sp} + 43c: e9298dd5 stmdb r9!, {r0, r2, r4, r6, r7, r8, sl, fp, pc} + 440: e935605b ldmdb r5!, {r0, r1, r3, r4, r6, sp, lr} + 444: e91191a7 ldmdb r1, {r0, r1, r2, r5, r7, r8, ip, pc} + 448: e9ab6e6a stmib fp!, {r1, r3, r5, r6, r9, sl, fp, sp, lr} + 44c: e98b4507 stmib fp, {r0, r1, r2, r8, sl, lr} + 450: e9b053cc ldmib r0!, {r2, r3, r6, r7, r8, r9, ip, lr} + 454: e9912de7 ldmib r1, {r0, r1, r2, r5, r6, r7, r8, sl, fp, sp} + 458: 2a000075 bcs 634 + 45c: 4bfffffe blmi 45c + 460: 612fff10 bxvs r0 + 464: e12fff33 blx r3 + 468: eafffffe b 468 + 46c: ebfffffe bl 46c + 470: 612fff10 bxvs r0 + 474: e12fff35 blx r5 + 478: da00006d ble 634 + 47c: 4bfffffe blmi 47c + 480: 112fff19 bxne r9 + 484: e12fff3c blx ip + 488: eafffedc b 0 + 48c: 8bfffffe blhi 48c + 490: 712fff11 bxvc r1 + 494: c12fff37 blxgt r7 + 498: aafffed8 bge 0 + 49c: 8bfffffe blhi 49c + 4a0: e12fff1c bx ip + 4a4: 312fff37 blxcc r7 + 4a8: eafffffe b 4a8 + 4ac: cbfffed3 blgt 0 + 4b0: a12fff11 bxge r1 + 4b4: e12fff30 blx r0 + 4b8: eafffffe b 4b8 + 4bc: eb00005c bl 634 + 4c0: c12fff1e bxgt lr + 4c4: 112fff3b blxne fp + 4c8: 2afffffe bcs 4c8 + 4cc: ebfffffe bl 4cc + 4d0: 212fff1a bxcs sl + 4d4: e12fff34 blx r4 + 4d8: eafffec8 b 0 + 4dc: ebfffffe bl 4dc + 4e0: 312fff1c bxcc ip + 4e4: e12fff38 blx r8 + 4e8: ea000051 b 634 + 4ec: ebfffffe bl 4ec + 4f0: e12fff1a bx sl + 4f4: e12fff31 blx r1 + 4f8: 4e042a06 vmlami.f32 s4, s8, s12 + 4fc: ee052a45 vmls.f32 s4, s10, s10 + 500: ee151a46 vnmla.f32 s2, s10, s12 + 504: be134a04 vnmlslt.f32 s8, s6, s8 + 508: 4e263a47 vnmulmi.f32 s6, s12, s14 + 50c: ee310a00 vadd.f32 s0, s2, s0 + 510: ee321a45 vsub.f32 s2, s4, s10 + 514: 2e810a06 vdivcs.f32 s0, s2, s12 + 518: ee030b06 vmla.f64 d0, d3, d6 + 51c: ee010b45 vmls.f64 d0, d1, d5 + 520: ee141b46 vnmla.f64 d1, d4, d6 + 524: 1e110b01 vnmlsne.f64 d0, d1, d1 + 528: 1e253b45 vnmulne.f64 d3, d5, d5 + 52c: 3e320b04 vaddcc.f64 d0, d2, d4 + 530: ee321b44 vsub.f64 d1, d2, d4 + 534: 4e810b05 vdivmi.f64 d0, d1, d5 + 538: eeb03ac3 vabs.f32 s6, s6 + 53c: 5eb13a44 vnegpl.f32 s6, s8 + 540: eeb10ac4 vsqrt.f32 s0, s8 + 544: eeb00bc4 vabs.f64 d0, d4 + 548: eeb11b44 vneg.f64 d1, d4 + 54c: eeb10bc1 vsqrt.f64 d0, d1 + 550: 5e00ea10 vmovpl s0, lr + 554: ee14ba10 vmov fp, s8 + 558: bc4ebb11 vmovlt d1, fp, lr + 55c: ec557b15 vmov r7, r5, d5 + 560: eeb04a46 vmov.f32 s8, s12 + 564: 8eb01b42 vmovhi.f64 d1, d2 + 568: 6eb72a00 vmovvs.f32 s4, #112 ; 0x70 + 56c: eeb72b00 vmov.f64 d2, #112 ; 0x70 + 570: eeb03a00 vmov.f32 s6, #0 + 574: eeb01b00 vmov.f64 d1, #0 + 578: ed952a1d vldr s4, [r5, #116] ; 0x74 + 57c: 3d811a0e vstrcc s2, [r1, #56] ; 0x38 + 580: 1d957b04 vldrne d7, [r5, #16] + 584: ed816b39 vstr d6, [r1, #228] ; 0xe4 + +00000588 : + 588: ed9f1a0e vldr s2, [pc, #56] ; 5c8 + 58c: ed8f3a0d vstr s6, [pc, #52] ; 5c8 + 590: bd1f2b04 vldrlt d2, [pc, #-16] ; 588 + 594: cd0f3b02 vstrgt d3, [pc, #-8] ; 594 + 598: 3d9f2a0a vldrcc s4, [pc, #40] ; 5c8 + 59c: ed8f0a09 vstr s0, [pc, #36] ; 5c8 + 5a0: cd9f4b08 vldrgt d4, [pc, #32] ; 5c8 + 5a4: ed0f0b09 vstr d0, [pc, #-36] ; 588 + 5a8: ed9f4a06 vldr s8, [pc, #24] ; 5c8 + 5ac: ed8f3a05 vstr s6, [pc, #20] ; 5c8 + 5b0: 5d1f4b0c vldrpl d4, [pc, #-48] ; 588 + 5b4: ed0f5b0d vstr d5, [pc, #-52] ; 588 + 5b8: 9d9f4a02 vldrls s8, [pc, #8] ; 5c8 + 5bc: 3d0f6a02 vstrcc s12, [pc, #-8] ; 5bc + 5c0: ed9f6b00 vldr d6, [pc] ; 5c8 + 5c4: bd0f1b01 vstrlt d1, [pc, #-4] ; 5c8 + +000005c8 : + 5c8: ec912a01 vldmia r1, {s4} + 5cc: 2ca62a01 vstmiacs r6!, {s4} + 5d0: ecb91b08 vldmia r9!, {d1-d4} + 5d4: eca36b04 vstmia r3!, {d6-d7} + 5d8: 6d323a01 vldmdbvs r2!, {s6} + 5dc: ed267a01 vstmdb r6!, {s14} + 5e0: ed3d4b08 vldmdb sp!, {d4-d7} + 5e4: ed205b06 vstmdb r0!, {d5-d7} + 5e8: eeb41a41 vcmp.f32 s2, s2 + 5ec: 7eb44ac4 vcmpevc.f32 s8, s8 + 5f0: eeb40b46 vcmp.f64 d0, d6 + 5f4: aeb43bc7 vcmpege.f64 d3, d7 + 5f8: beb51a40 vcmplt.f32 s2, #0.0 + 5fc: ceb57ac0 vcmpegt.f32 s14, #0.0 + 600: eeb54b40 vcmp.f64 d4, #0.0 + 604: eeb51bc0 vcmpe.f64 d1, #0.0 + 608: 6ebd1ac3 vcvtvs.s32.f32 s2, s6 + 60c: cebc3ac7 vcvtgt.u32.f32 s6, s14 + 610: 3eb80ac1 vcvtcc.f32.s32 s0, s2 + 614: 3eb81a42 vcvtcc.f32.u32 s2, s4 + 618: 8ebd2bc4 vcvthi.s32.f64 s4, d4 + 61c: 8ebc3bc6 vcvthi.u32.f64 s6, d6 + 620: 9eb73bc7 vcvtls.f32.f64 s6, d7 + 624: eeb83bc4 vcvt.f64.s32 d3, s8 + 628: 0eb85b47 vcvteq.f64.u32 d5, s14 + 62c: eeb74ac5 vcvt.f64.f32 d4, s10 + 630: e120017a bkpt 0x001a + */ + + static const unsigned int insns[] = + { + 0xe082852b, 0x009310c7, 0xe0290284, 0xc0329066, + 0xc04c000e, 0x00528364, 0xe069818d, 0x60708864, + 0xd08597a1, 0xe09d12c6, 0xc0adb0c7, 0xe0b80329, + 0xe0c392e6, 0x80dd1845, 0x30e28486, 0xe0f4a76d, + 0x118db785, 0xe19a9764, 0xe1cd90e5, 0xe1d20547, + 0xe086d777, 0x809c4776, 0x90265c57, 0xe035841d, + 0xe04c2055, 0x20539c17, 0xc06c9614, 0xe072811c, + 0xe08c4d1d, 0xe09b8d76, 0x10a20415, 0xe0beb256, + 0x80ca835e, 0xe0dc1615, 0x60e54a7e, 0xe0fc181d, + 0x61818076, 0xe19db577, 0xe1ce4216, 0xe1dba31d, + 0x828d8261, 0xe29ed69b, 0xe226e87d, 0xe2332f49, + 0xe24d46d9, 0xb25e1402, 0xe2650325, 0x3274882f, + 0xb2849102, 0xe2948902, 0x22aeac2a, 0xe2b6aabd, + 0xe2cc2426, 0xe2da85a5, 0xe2e6d871, 0x12fba6e9, + 0x638737ff, 0x03952951, 0x63c18eea, 0x33d2020a, + 0xe118028d, 0xe13601a7, 0xe15c0164, 0xb1750807, + 0xe112073e, 0x31300572, 0x915e0b37, 0x617a0b17, + 0xe3120585, 0x433e071b, 0xe355030e, 0x3377010a, + 0xe1a00b84, 0xe1b01484, 0xe1a001aa, 0xe1b00a2a, + 0xe1a015c9, 0x61b0254b, 0x31a08fe2, 0xe1b0946c, + 0xe1a0877e, 0xe1b0c473, 0xc1a0ce1d, 0xe1b0c61d, + 0xc1a00931, 0xc1b0bc33, 0xd1a0265c, 0xb1b0165a, + 0xe1a0a003, 0xe1b00009, 0x73a03e29, 0xe3b0497e, + 0xe1a0c1a6, 0x71b0554d, 0xe1a0137e, 0x01b0897c, + 0x330cbf31, 0x33429bf7, 0xd001059d, 0xe0100b9a, + 0xe0207c93, 0x0038639b, 0xe084e695, 0xe0940796, + 0xe0a08e9b, 0xe0b4b79e, 0x20c51796, 0x40db059c, + 0xe0498592, 0x0060ed94, 0x510d9054, 0x4125005c, + 0xe1473055, 0xe1649052, 0xe101658c, 0xe1006cca, + 0xe108e3a1, 0xe10176ed, 0xe1206483, 0xe12b7ec4, + 0xe14a0786, 0x914b3ec4, 0xe14b8ca3, 0xe14185e3, + 0x21220dac, 0xe12806ec, 0xa1620e86, 0xe16807cc, + 0x016a0ea3, 0xe1600de3, 0xe697a009, 0xe5d900c4, + 0xe1b4e0b6, 0xe1f96ed8, 0xe09120f1, 0xe6890004, + 0xe5e5305c, 0xe1c82ab0, 0xe79c8008, 0xe4dab010, + 0xe19ab0b6, 0xe1bb50da, 0xe19360f7, 0xe7ad7005, + 0xe5ca2000, 0xe08460b3, 0xe59ca000, 0xe4db4084, + 0xe09990bc, 0xe0d399d4, 0xe1f2b9f4, 0xe78db00b, + 0xe7cd100a, 0xe08ea0b9, 0xe7b36004, 0xe7f6400d, + 0xe09760ba, 0xe1b600db, 0xe096a0fd, 0xe783700c, + 0xe7e83001, 0xe1cc44b0, 0xe51f1008, 0xe55f7008, + 0xe15f21b0, 0xe15fa0d8, 0xe1dfe3f8, 0xe51f2008, + 0xe55f3008, 0xe1df72bc, 0xe15fd0d8, 0xe15fa2fc, + 0xe51f5008, 0xe5dfe01c, 0xe1df51b8, 0xe15f63dc, + 0xe15fb4f0, 0xe59f700c, 0xe5df5008, 0xe15fa4bc, + 0xe1df60d0, 0xe15f90f8, 0xe690036a, 0xe7d8348e, + 0xe78d5783, 0xe6c99145, 0xe7945360, 0xe7d0e4a4, + 0xe68c52cc, 0xe7e13667, 0xe7b26063, 0xe7fe8842, + 0xe7a363e6, 0xe7c83502, 0xe79db40e, 0xe7fda20c, + 0xe789d142, 0xe7eb774e, 0xe19bcf9f, 0xe1dc4f9f, + 0xe1fbbf9f, 0xe18e1f97, 0xe1c4cf96, 0x21e74f96, + 0xe6143f17, 0xe61a9f33, 0xe615cf56, 0xe615cf7e, + 0xe61a0f97, 0x66180ff2, 0x5624bf15, 0x6623bf3c, + 0xe6230f55, 0xe61caf75, 0x3626af9e, 0xe62baff7, + 0x56349f1e, 0xe63e1f37, 0x363b9f55, 0xa6313f7b, + 0xc635df97, 0xe6351ff7, 0xe654af17, 0x26591f37, + 0xe65dbf57, 0xe654bf7e, 0x365d2f97, 0xc65a8ffe, + 0xe66c3f1d, 0xe66d4f36, 0xe66a1f5e, 0xd66d2f7e, + 0xe66c1f95, 0xc6640ffd, 0x867a0f15, 0xd674bf37, + 0xa67e1f59, 0xe67b2f7e, 0xa6749f95, 0x867d2ffe, + 0xe683a877, 0x26a59c77, 0xe6b53477, 0xe6c48476, + 0x06eb007d, 0xe6fc9075, 0xa68f387b, 0x86af2076, + 0xc6bf3c7d, 0xe6cfc875, 0xe6efc875, 0xe6ff8875, + 0x06bfaf34, 0xa6bf8fbc, 0xe6ffef37, 0xc6ffdfb7, + 0xe719fe1d, 0xe732f61c, 0x47a20a51, 0xe7ee9852, + 0x87dd1d9b, 0xe7d0339f, 0xe8060d6b, 0xe80438df, + 0xe810e1d4, 0xe83c9873, 0xe8a931bd, 0xe88b55c3, + 0xe8bcbd22, 0xe8bda10a, 0xe92b2219, 0xe9298dd5, + 0xe935605b, 0xe91191a7, 0xe9ab6e6a, 0xe98b4507, + 0xe9b053cc, 0xe9912de7, 0x2a000075, 0x4bfffffe, + 0x612fff10, 0xe12fff33, 0xeafffffe, 0xebfffffe, + 0x612fff10, 0xe12fff35, 0xda00006d, 0x4bfffffe, + 0x112fff19, 0xe12fff3c, 0xeafffedc, 0x8bfffffe, + 0x712fff11, 0xc12fff37, 0xaafffed8, 0x8bfffffe, + 0xe12fff1c, 0x312fff37, 0xeafffffe, 0xcbfffed3, + 0xa12fff11, 0xe12fff30, 0xeafffffe, 0xeb00005c, + 0xc12fff1e, 0x112fff3b, 0x2afffffe, 0xebfffffe, + 0x212fff1a, 0xe12fff34, 0xeafffec8, 0xebfffffe, + 0x312fff1c, 0xe12fff38, 0xea000051, 0xebfffffe, + 0xe12fff1a, 0xe12fff31, 0x4e042a06, 0xee052a45, + 0xee151a46, 0xbe134a04, 0x4e263a47, 0xee310a00, + 0xee321a45, 0x2e810a06, 0xee030b06, 0xee010b45, + 0xee141b46, 0x1e110b01, 0x1e253b45, 0x3e320b04, + 0xee321b44, 0x4e810b05, 0xeeb03ac3, 0x5eb13a44, + 0xeeb10ac4, 0xeeb00bc4, 0xeeb11b44, 0xeeb10bc1, + 0x5e00ea10, 0xee14ba10, 0xbc4ebb11, 0xec557b15, + 0xeeb04a46, 0x8eb01b42, 0x6eb72a00, 0xeeb72b00, + 0xeeb03a00, 0xeeb01b00, 0xed952a1d, 0x3d811a0e, + 0x1d957b04, 0xed816b39, 0xed9f1a0e, 0xed8f3a0d, + 0xbd1f2b04, 0xcd0f3b02, 0x3d9f2a0a, 0xed8f0a09, + 0xcd9f4b08, 0xed0f0b09, 0xed9f4a06, 0xed8f3a05, + 0x5d1f4b0c, 0xed0f5b0d, 0x9d9f4a02, 0x3d0f6a02, + 0xed9f6b00, 0xbd0f1b01, 0xec912a01, 0x2ca62a01, + 0xecb91b08, 0xeca36b04, 0x6d323a01, 0xed267a01, + 0xed3d4b08, 0xed205b06, 0xeeb41a41, 0x7eb44ac4, + 0xeeb40b46, 0xaeb43bc7, 0xbeb51a40, 0xceb57ac0, + 0xeeb54b40, 0xeeb51bc0, 0x6ebd1ac3, 0xcebc3ac7, + 0x3eb80ac1, 0x3eb81a42, 0x8ebd2bc4, 0x8ebc3bc6, + 0x9eb73bc7, 0xeeb83bc4, 0x0eb85b47, 0xeeb74ac5, + 0xe120017a, + }; +// END Generated code -- do not edit + + // reset the detected cpu feature set + VM_Version::features(detected_features); + + { + bool ok = true; + unsigned int *insns1 = (unsigned int *)entry; + for (unsigned int i = 0; i < sizeof insns / sizeof insns[0]; i++) { + if (insns[i] != insns1[i]) { + ok = false; + printf("Ours:\n"); + Disassembler::decode((address)&insns1[i], (address)&insns1[i+1]); + printf(" Raw: 0x%x\n", insns1[i]); + printf("Theirs:\n"); + Disassembler::decode((address)&insns[i], (address)&insns[i+1]); + printf(" Raw: 0x%x\n", insns[i]); + printf("\n"); + } + } + assert(ok, "Assembler smoke test failed"); + } +#endif // ASSERT +} + +#undef __ +void Address::AddressConstruct(Register base, RegisterOrConstant index, enum reg_op op, + shift_op shift, enum wb_mode mode) { + _base = base; + _wb_mode = mode; + _shift = shift; + _target = 0; + if (index.is_register()) { + _acc_mode = reg; + _index = index.as_register(); + _offset = 0; + _as_op = op; + } else { + assert(shift == lsl(), "should be"); + assert(index.is_constant(), "should be"); + _acc_mode = imm; + // _index = no_reg; + _offset = index.as_constant(); + if(SUB == _as_op) + _offset = -_offset; + } +} + +void Address::encode(Instruction_aarch32 *i, CodeSection *sec, address pc) const { + long offset = _offset; + access_mode mode = _acc_mode; + + if(lit == mode) { + //Create the offset from the address + offset = _target - pc; + mode = imm; + } + + //Correct the offset if the base is the PC + if(r15_pc == _base && imm == mode) { + offset -= 8; + } + + int U = (offset >= 0 && _acc_mode == imm) || (_as_op == ADD && _acc_mode == reg); + int P = pre == _wb_mode || off == _wb_mode; + int W = pre == _wb_mode; + i->f(P, 24), i->f(U, 23), i->f(W, 21), i->rf(_base, 16); + + offset = offset < 0 ? -offset : offset; + int opc = i->get(27, 25); + + if (imm == mode) { + switch(opc) { + case 0b010: + // LDR, LDRB + // STR, STRB + i->f(offset, 11, 0); + break; + case 0b000: + // LDRH, LDRSH, LDRSB, LDRD + // STRH, STRD + i->f(1, 22); + assert(offset < (1 << 8), "Offset larger than a byte"); + i->f(offset & 0xF, 3, 0); + i->f(offset >> 4, 11, 8); + break; + default: + ShouldNotReachHere(); + } + } else if (reg == mode) { + assert(r15_pc->encoding_nocheck() != + _base->encoding_nocheck(), "Remove this if you have your offsets right"); + switch(opc) { + case 0b010: + // LDR, LDRB + // STR, STRB + //Need to set bit 25 as Register 0b011 + i->f(1, 25); + i->f(_shift.shift(), 11, 7); + i->f(_shift.kind(), 6, 5); + i->f(0, 4); + i->rf(_index, 0); + break; + case 0b000: + // LDRH, LDRSH, LDRSB, LDRD + // STRH, STRD + //Need to clear bit 22 as Register + i->f(0, 22); + assert(_shift == lsl(), "Type of load/store does not support shift"); + i->f(0b0000, 11, 8); + i->rf(_index, 0); + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + + if(lit == _acc_mode) { + sec->relocate(pc, _rspec); + } +} + +void Address::fp_encode(Instruction_aarch32 *i, CodeSection *sec, address pc) const { + // ATM works only for immediate + assert(_wb_mode == off, "Can't do pre or post addressing for vldr, vstr"); + long offset = _offset; + if(imm == _acc_mode) { + if(r15_pc == _base) { + //Correct the offset if the base is the PC + offset -= 8; + } + bool U = offset >= 0; + assert(0 == (offset & 3), "Can only access aligned data"); + unsigned imm8 = uabs(offset) / 4; + i->f(U, 23), i->rf(_base, 16), i->f(imm8, 7, 0); + } else { + ShouldNotReachHere(); + } +} + +#define __ as-> +void Address::lea(MacroAssembler *as, Register r) const { + Relocation* reloc = _rspec.reloc(); + relocInfo::relocType rtype = (relocInfo::relocType) reloc->type(); + + //TODO Potentially remove this - added as aarch64 doesn't contain + // any method of handling pre or post + assert( _wb_mode != pre && _wb_mode != post, "Wrong wb mode"); + // could probably permit post however + switch(_acc_mode) { + case imm: { + if (_offset == 0 && _base == r) // it's a nop + break; + if (_offset > 0) + __ add(r, _base, _offset); + else + __ sub(r, _base, -_offset); + break; + } + case reg: { + switch (_as_op) { + case ADD: + __ add(r, _base, _index, _shift); + break; + case SUB: + __ sub(r, _base, _index, _shift); + break; + } + break; + } + case lit: { + if (rtype == relocInfo::none) + __ mov(r, target()); + else + __ movptr(r, (uint32_t)target()); + break; + } + default: + ShouldNotReachHere(); + } +} +#undef __ + +#define __ as-> +class Address; + +// Adapts given Address to the capabilities of instructions respective to the +// provided data type. E.g. some of the instructions cannot use index register +// while others cannot have an offset field. +// Returns a copy of this Address is it's good or constructs a new Address +// good for respective instructions by emitting necessary code to calculate +// the address in tmp register +Address Address::safe_for(InsnDataType type, MacroAssembler *as, Register tmp) { + if (is_safe_for(type)) + return *this; + assert(tmp->is_valid(), "must be"); + lea(as, tmp); + return Address(tmp); +} +#undef __ + +bool Address::is_safe_for(InsnDataType type) { + switch (_acc_mode) { + case imm: + case lit: + return offset_ok_for_immed(_offset, type); + case reg: + return shift_ok_for_index(_shift, type); + case no_mode: + default: + ShouldNotReachHere(); + return false; + } +} + + +bool Address::offset_ok_for_immed(long offset, InsnDataType type) { + const int o = offset < 0 ? -offset : offset; + switch (type) { + case IDT_INT: + case IDT_BOOLEAN: + case IDT_OBJECT: + case IDT_ADDRESS: + case IDT_METADATA: + case IDT_ARRAY: + return o <= 0xfff; + case IDT_BYTE: + case IDT_SHORT: + case IDT_LONG: + case IDT_CHAR: + return o <= 0xff; + case IDT_FLOAT: + case IDT_DOUBLE: + return !(o & ~0x3fc); + case IDT_LEA: + return true; + case IDT_ATOMIC: + case IDT_MULTIWORD: + return !o; + default: + ShouldNotReachHere(); + return false; + } +} + +bool Address::shift_ok_for_index(shift_op shift, InsnDataType type) { + switch (type) { + case IDT_INT: + case IDT_BOOLEAN: + case IDT_OBJECT: + case IDT_ADDRESS: + case IDT_METADATA: + case IDT_ARRAY: + return !shift.is_register(); + case IDT_BYTE: + case IDT_SHORT: + case IDT_LONG: + case IDT_CHAR: + return !shift.is_register() && shift.shift() == 0; + case IDT_LEA: + return true; + case IDT_FLOAT: + case IDT_DOUBLE: + case IDT_ATOMIC: + case IDT_MULTIWORD: + return false; + default: + ShouldNotReachHere(); + return false; + } +} + +void Assembler::emit_data64(jlong data, + relocInfo::relocType rtype, + int format) { + if (rtype == relocInfo::none) { + emit_int64(data); + } else { + emit_data64(data, Relocation::spec_simple(rtype), format); + } +} + +void Assembler::emit_data64(jlong data, + RelocationHolder const& rspec, + int format) { + + assert(inst_mark() != NULL, "must be inside InstructionMark"); + // Do not use AbstractAssembler::relocate, which is not intended for + // embedded words. Instead, relocate to the enclosing instruction. + code_section()->relocate(inst_mark(), rspec, format); + emit_int64(data); +} + +extern "C" { + void das(uint64_t start, int len) { + ResourceMark rm; + len <<= 2; + if (len < 0) + Disassembler::decode((address)start + len, (address)start); + else + Disassembler::decode((address)start, (address)start + len); + } + + JNIEXPORT void das1(unsigned long insn) { + das(insn, 1); + } +} + +#define starti Instruction_aarch32 do_not_use(this); set_current(&do_not_use) + + void Assembler::adr(Register Rd, address adr, Condition cond) { + int offset = adr - pc() - 8; + adr_encode(Rd, offset, cond); + } + +#undef starti + +Address::Address(address target, relocInfo::relocType rtype) + : _acc_mode(lit), _base(sp), _offset(0), _wb_mode(off) { + //TODO we don't complete _wb_mode - what about Addresses that are pre/post accessed? + _is_lval = false; + _target = target; + switch (rtype) { + case relocInfo::oop_type: + case relocInfo::metadata_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + break; + case relocInfo::external_word_type: + _rspec = external_word_Relocation::spec(target); + break; + case relocInfo::internal_word_type: + _rspec = internal_word_Relocation::spec(target); + break; + case relocInfo::opt_virtual_call_type: + _rspec = opt_virtual_call_Relocation::spec(); + break; + case relocInfo::static_call_type: + _rspec = static_call_Relocation::spec(); + break; + case relocInfo::runtime_call_type: + _rspec = runtime_call_Relocation::spec(); + break; + case relocInfo::poll_type: + case relocInfo::poll_return_type: + _rspec = Relocation::spec_simple(rtype); + break; + case relocInfo::none: + _rspec = RelocationHolder::none; + break; + default: + ShouldNotReachHere(); + break; + } +} + +#ifdef COMPILER2 +Address Address::make_raw(int base, int index, int scale, unsigned long o, relocInfo::relocType disp_reloc) { + RelocationHolder rspec; + if (disp_reloc != relocInfo::none) { + rspec = Relocation::spec_simple(disp_reloc); + } + if (as_Register(index) == r15_pc) { + assert(scale == 0, "unsupported"); + Address a(as_Register(base), o); + a._rspec = rspec; + return a; + } else { + assert(o == 0, "unsupported"); + Address a(as_Register(base), as_Register(index), lsl(scale)); + a._rspec = rspec; + return a; + } +} +#endif + +void Assembler::adr(Register r, const Address &dest, Condition cond) { + code_section()->relocate(pc(), dest.rspec()); + adr(r, dest.target()); +} + +void Assembler::wrap_label(Label &L, Assembler::uncond_branch_insn insn) { + if (L.is_bound()) { + (this->*insn)(target(L)); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(pc()); + } +} +void Assembler::wrap_label(Label &L, Condition cond, + Assembler::cond_branch_insn insn) { + if (L.is_bound()) { + (this->*insn)(target(L), cond); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(pc(), cond); + } +} + +void Assembler::wrap_label(Register r, Label &L, Condition cond, + Assembler::cond_ldst_insn insn) { + if (L.is_bound()) { + (this->*insn)(r, target(L), cond); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r, pc(), cond); + } +} + + +void Assembler::wrap_label(FloatRegister r, Label &L, Condition cond, + Assembler::cond_fp_ldst_insn insn) { + if (L.is_bound()) { + (this->*insn)(r, target(L), cond); + } else { + L.add_patch_at(code(), locator()); + (this->*insn)(r, pc(), cond); + } +} + + +uint32_t Assembler::encode_imm12(int imm) { + assert(is_valid_for_imm12(imm), + "only valid immediates allowed, call is_valid_for_imm12 first"); + uint32_t n = imm; + if ((n & 0xFFFFFF00) == 0) { + return n; + } + if ((n & 0xFC000000) == 0) { + const int lshift = __builtin_ctz(n) & 0xFFFFFFFE; + return ((32 - lshift) << 7) | (n >> lshift); + } + n = (n << 16) | (n >> 16); + const int lshift = __builtin_ctz(n) & 0xFFFFFFFE; + return ((16 - lshift) << 7) | (n >> lshift); +} + +int Assembler::decode_imm12(uint32_t imm12) { + assert((imm12 & 0xFFFFF000) == 0, "bad imm12"); + uint32_t shift = (imm12 & 0x00000F00) >> 7; + uint32_t value = imm12 & 0x000000FF; + return (int) ((value >> shift) | (value << (32 - shift))); +} + +bool Assembler::is_valid_for_imm12(int imm) { + uint32_t n = (uint32_t) imm; + uint32_t shift = __builtin_clz(n) & 0xFFFFFFFE; + uint32_t result = n << shift; + if ((result & 0x00FFFFFF) == 0) { + return true; + } + n = (n << 16) | (n >> 16); + shift = __builtin_clz(n) & 0xFFFFFFFE; + result = n << shift; + if ((result & 0x00FFFFFF) == 0) { + return true; + } + return false; +} + +bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) { + return is32 && is_valid_for_imm12(imm); +} + +bool Assembler::operand_valid_for_add_sub_immediate(int imm) { + return is_valid_for_imm12(imm); +} + +bool Assembler::operand_valid_for_add_sub_immediate(unsigned long imm) { + return is_valid_for_imm12(imm); +} + +bool Assembler::operand_valid_for_add_sub_immediate(unsigned imm) { + return is_valid_for_imm12(imm); +} + +bool Assembler::operand_valid_for_add_sub_immediate(jlong imm) { + return is_valid_for_imm12(imm >> 32) && is_valid_for_imm12(imm); +} + +// n.b. this is implemented in subclass MacroAssembler +void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); } + +int AbstractAssembler::code_fill_byte() { + return 0; +} + +void Assembler::mov_immediate(Register dst, uint32_t imm32, Condition cond, bool s) { +#ifndef PRODUCT + { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "0x%"PRIX32, imm32); + block_comment(buffer); + } +#endif + if(is_valid_for_imm12(imm32)) { + if(s) movs_i(dst, (unsigned)imm32, cond); + else mov_i (dst, (unsigned)imm32, cond); + } else if(is_valid_for_imm12(~imm32)) { + if(s) mvns_i(dst, (unsigned)~imm32, cond); + else mvn_i (dst, (unsigned)~imm32, cond); + } else if (!s && VM_Version::features() & (FT_ARMV7 | FT_ARMV6T2) && + (imm32 < (1 << 16))) { + movw_i(dst, (unsigned)imm32, cond); + } else if (!s && VM_Version::features() & (FT_ARMV7 | FT_ARMV6T2) && + !(imm32 & ((1 << 16) - 1))) { + movw_i(dst, (unsigned)0, cond); + movt_i(dst, (unsigned)(imm32 >> 16), cond); + } else { // TODO Could expand to varied numbers of mov and orrs + //Need to do a full 32 bits + mov_immediate32(dst, imm32, cond, s); + } +} + +//This should really be in the macroassembler +void Assembler::mov_immediate32(Register dst, uint32_t imm32, Condition cond, bool s) +{ + // Need to move a full 32 bit immediate, for example if we're loading an address that + // might change later and therefore need to be updated. + if (VM_Version::features() & (FT_ARMV7 | FT_ARMV6T2)) { + //Use a movw and a movt + Assembler::movw_i(dst, (unsigned)(imm32 & 0xffff), cond); + Assembler::movt_i(dst, (unsigned)(imm32 >> 16), cond); + if(s) { + //Additionally emit a cmp instruction + Assembler::cmp(dst, 0); + } + } else { + // Sadly we don't have movw, movt + // instead emit a mov and three orr + mov_i(dst, imm32 & (0xff ), cond); + orr(dst, dst, imm32 & (0xff << 8 ), cond); + orr(dst, dst, imm32 & (0xff << 16), cond); + if(s) orrs(dst, dst, imm32 & (0xff << 24), cond); + else orr (dst, dst, imm32 & (0xff << 24), cond); + } +} + +#define starti Instruction_aarch32 do_not_use(this); set_current(&do_not_use) +void Assembler::add_sub_imm(int decode, Register Rd, Register Rn, int imm, + Condition cond, bool s) { + int cpart = 0; + switch(decode) { + case 0b0100: cpart = 0b0010; break; // ADD -> SUB + case 0b0010: // SUB -> ADD + case 0b0011: cpart = 0b0100; break; // RSB -> ADD + case 0b0101: cpart = 0b0110; break; // ADC -> SUBC + case 0b0110: // SUBC -> ADC + case 0b0111: cpart = 0b0101; break; // RSC -> ADC + default: ShouldNotReachHere(); + } + //try both possible imm_instrs + if(imm_instr(decode, Rd, Rn, imm, cond, s)) return; + if(imm_instr(cpart, Rd, Rn, -imm, cond, s)) return; + + //Try plan B - a mov first - need to have destination that is not an arg + assert(Rd != Rn, "Can't use imm and can't do a mov. I'm in a jam."); + mov_immediate(Rd, (uint32_t)uabs(imm), cond, s); + //Now do the non immediate version - copied from the immediate encodings + { + starti; + reg_instr( imm < 0 ? cpart : decode, lsl(), cond, s); + rf(Rn, 16), rf(Rd, 12), rf(Rd, 0); + } +} + +bool Assembler::can_ldst_multiple( unsigned regset, const Address& adr) { + int nbits = count_bits(regset); + return adr.get_mode() == Address::imm && + !(adr.base()->bit() & regset) && // FIXME, this could be relaxed + (((adr.offset() == 0 || adr.offset() == wordSize || adr.offset() == -nbits * wordSize) && + (adr.get_wb_mode() == Address::pre || adr.get_wb_mode() == Address::off)) || + ((adr.offset() == 0 || adr.offset() == -wordSize || adr.offset() == nbits * wordSize) && + adr.get_wb_mode() == Address::post)); +} + +unsigned Assembler::count_bits(unsigned val) { + unsigned i, count; + for(i = 0, count = 0; i < 8 * sizeof(val); val >>= 1, i++) + if( val & 1 ) count++; + return count; +} + + +void Assembler::vmov_imm(FloatRegister Rd, unsigned imm, bool is64bit, + Condition cond) { + starti; + fp_instr_base(is64bit, cond); + f(0b1011, 23, 20); + // double register passed (see 'd0'-'dN' encoding), not reencode it's number + fp_rencode(Rd, false, 12, 22); + f(0b0000, 7, 4); + f(imm & 0xf, 3, 0); + f(imm >> 4, 19, 16); +} + +void Assembler::vmov_imm(FloatRegister Rd, unsigned imm) { + assert(operand_valid_for_double_immediate(0), "operand should be valid for immediate"); + int cmod = 0b0000; + { + starti; + f(0b1111001, 31, 25); + f(0, 24); // imm1 + f(0b10000, 23, 19); + // double register passed (see 'd0'-'dN' encoding), not reencode it's number + fp_rencode(Rd, false, 12, 22); + f(cmod, 11, 8); + f(0b000, 7, 5); + f(1, 4); + f(imm & 0xf, 3, 0); //imm4 + f(imm >> 4, 18, 16); //imm3 + } +} + +void Assembler::vmov_imm_zero(FloatRegister Rd, bool is64bit, + Condition cond) { + // Note that this is not a floating point vmov but instead + // an integer vmov from the SIMD instructions. + // cannot be conditional. + assert(operand_valid_for_double_immediate(0), "operand should be valid for immediate"); + assert(is64bit, "SIMD loading available only for double registers"); + assert(cond == C_DFLT, "Unable to vmov #0 conditionally"); + //int cmod = is64bit? 0b1110 : 0b0000; // ? I64 : I32 + int cmod = 0b1110; + { + starti; + f(0b1111001, 31, 25); + f(0, 24); // imm1 + f(0b10000, 23, 19); + // double register passed (see 'd0'-'dN' encoding), not reencode it's number + fp_rencode(Rd, false, 12, 22); + f(0b000, 18, 16); //imm3 + f(cmod, 11, 8); + f(0b00, 7, 6); + f(is64bit, 5); + f(1, 4); + f(0b0000, 3, 0); //imm4 + } +} + +bool Assembler::operand_valid_for_float_immediate(float v) { + if (!(VM_Version::features() & FT_VFPV3)) { + return false; + } + union ufloat { + float f; + uint32_t u; + } imm; + unsigned tmp; + imm.f = v; + + if (imm.u & ((1 << 19) - 1)) + return false; + + tmp = (imm.u >> 25) & ((1 << 6) - 1); + return tmp == 32 || tmp == 31; +} + +bool Assembler::operand_valid_for_double_immediate(double v) { + if (!(VM_Version::features() & FT_VFPV3)) { + return false; + } + union ufloat { + double f; + uint64_t u; + } imm; + unsigned tmp; + imm.f = v; + + if ((VM_Version::features() & FT_AdvSIMD) && imm.u == 0) + return true; + + if (imm.u & (uint64_t) 0xffffffffffffLL) + return false; + + imm.u >>= 48; + + tmp = (imm.u >> 6) & ((1 << 9) - 1); + return tmp == 0x100 || tmp == 0xff; +} + +unsigned Assembler::encode_float_fp_imm(float imm_f) { + assert(operand_valid_for_float_immediate(imm_f), "operand should be valid for immediate"); + union ufloat { + float f; + uint32_t u; + } imm; + unsigned tmp, imm8; + imm.f = imm_f; + + assert(!(imm.u & ((1 << 19) - 1)), "Invalid float imm"); + tmp = (imm.u >> 25) & ((1 << 6) - 1); + assert(tmp == 32 || tmp == 31, "Invalid float imm"); + + imm8 = (imm.u >> 24) & 0x80; // set a + imm8 |= (imm.u >> 19) & 0x7F; // set bcdefgh + return imm8; +} + +unsigned Assembler::encode_double_fp_imm(double imm_f) { + assert(operand_valid_for_double_immediate(imm_f), "operand should be valid for immediate"); + union ufloat { + double f; + uint64_t u; + } imm; + unsigned tmp, imm8; + imm.f = imm_f; + + assert(!(imm.u & (uint64_t)0xffffffffffffLL), "Invalid float imm"); + imm.u >>= 48; + + tmp = (imm.u >> 6) & ((1 << 9) - 1); + assert(tmp == 0x100 || tmp == 0xff, "Invalid float imm"); + + imm8 = (imm.u >> 8) & 0x80; // set a + imm8 |= imm.u & 0x7F; // set bcdefgh + return imm8; +} + + +void Assembler::fp_ldst_instr(int decode, bool is64bit, const Address& adr, + Condition cond) { + f(cond, 31, 28), f(0b110, 27, 25), f(decode, 24, 20); + f(0b101, 11, 9), f(is64bit, 8); + adr.fp_encode(current, code_section(), pc()); +} + +void Assembler::fp_ldst_mul(Register Rn, uint32_t regset, bool load, bool is64bit, + enum fp_mode mode, Condition cond) { + starti; + bool P = db_wb == mode; + bool U = ia_wb == mode || ia == mode; + bool W = ia_wb == mode || db_wb == mode; + // Encode registers + unsigned i, fp_first_reg, nregs = 1; + bool enc_z = false; + for(fp_first_reg = 0; !(regset & 1); regset >>= 1, fp_first_reg++); + FloatRegister Rd = (FloatRegister) fp_first_reg; + for(i = 0; i + fp_first_reg < 8 * sizeof(int); i++) { + regset >>= 1; + if(regset & 1) { + assert(!enc_z, "Unable to encode non-consecutive registers in fp_ldst_mul"); + nregs++; + } else { + enc_z = true; + } + } + assert(!is64bit || nregs <= 16, "Too many registers in a set"); + f(cond, 31, 28), f(0b110, 27, 25); f(P, 24), f(U, 23), f(W, 21), f(load, 20); + // vstm/vstm uses double register number, not it's encoding. Should reencode it. + rf(Rn, 16), fp_rencode(Rd, is64bit, 12, 22), f(0b101, 11, 9), f(is64bit, 8); + f(is64bit ? nregs * 2 : nregs, 7, 0); +} + +void Assembler::simd_ldst(FloatRegister Rd, unsigned type, unsigned size, unsigned num_regs, + const Address &addr, enum SIMD_Align align, unsigned encode) { + starti; + assert(addr.get_mode() == Address::imm && + (addr.get_wb_mode() == Address::off && addr.offset() == 0) || + (addr.get_wb_mode() == Address::post && addr.offset() == long(8*num_regs)), "Unsupported"); + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + f(0b1111, 31, 28), f(0b0100, 27, 24), f(0, 23), f(encode, 21, 20); + rf(addr.base(), 16), fp_rencode(Rd, false, 12, 22), f(type, 11, 8), f(size, 7, 6); + f((unsigned)align, 5, 4), f(addr.get_wb_mode() == Address::post ? 0b1101 : 0b1111, 3, 0); +} + +void Assembler::simd_ldst_single(FloatRegister Rd, unsigned size, unsigned index, + const Address &addr, bool align, unsigned encode) { + starti; + assert(addr.get_mode() == Address::imm && + (addr.get_wb_mode() == Address::off && addr.offset() == 0) || + (addr.get_wb_mode() == Address::post && addr.offset() == long(1<>2)&3, 22, 21), f(bit20, 20); + fp_rencode(Dd, false, 16, 7), f(opc>>4, 23); + rf(Rt, 12), f(0b1011, 11, 8), f(opc & 3, 6, 5), f(0b10000, 4, 0); +} + +void Assembler::simd_logicalop(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, unsigned q, + unsigned a, unsigned b, unsigned u, unsigned c) { + starti; + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(!q || ((Dd->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd registers"); + f(0b1111001, 31, 25), f(u, 24), f(0, 23), f(c, 21, 20), fp_rencode(Dd, false, 12, 22); + fp_rencode(Dn, false, 16, 7), f(a, 11, 8), fp_rencode(Dm, false, 0, 5), f(q, 6), f(b, 4); +} + +void Assembler::simd_vmul(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, + unsigned bit24, unsigned bits109, unsigned size, unsigned mul, unsigned bit6) { + starti; + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + f(0b1111001, 31, 25), f(bit24, 24), f(size, 21, 20), fp_rencode(Dd, false, 12, 22); + f(mul^1, 23), fp_rencode(Dn, false, 16, 7), f(1, 11), f(bits109, 10, 9); + f(mul, 8), f(bit6, 6), f(mul, 4), fp_rencode(Dm, false, 0, 5); +} + +void Assembler::simd_vuzp(FloatRegister Dd, FloatRegister Dm, unsigned size, unsigned q) { + starti; + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(!q || ((Dd->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd registers"); + f(0b111100111, 31, 23), fp_rencode(Dd, false, 12, 22), f(0b11, 21, 20), f(size, 19, 18); + f(0b10, 17, 16), f(0b00010, 11, 7), f(q, 6), f(0, 4), fp_rencode(Dm, false, 0, 5); +} + +void Assembler::simd_vshl(FloatRegister Dd, FloatRegister Dm, unsigned imm, + unsigned q, unsigned u, unsigned encode) { + starti; + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(!q || ((Dd->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd registers"); + f(0b1111001, 31, 25), f(u, 24), f(1, 23), fp_rencode(Dd, false, 12, 22); + f(imm & 0b111111, 21, 16), f(imm >> 6, 7), f(q, 6); + f(encode, 11, 8), fp_rencode(Dm, false, 0, 5), f(1, 4); +} + +void Assembler::simd_vshl(FloatRegister Dd, FloatRegister Dm, FloatRegister Dn, unsigned size, + unsigned q, unsigned u) { + starti; + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(!q || ((Dd->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd registers"); + f(0b1111001, 31, 25), f(u, 24), f(0b0, 23), f(size, 21, 20), fp_rencode(Dn, false, 16, 7); + fp_rencode(Dd, false, 12, 22), f(0b0100, 11, 8), f(q, 6), fp_rencode(Dm, false, 0, 5), f(0, 4); +} + +// Two registers miscellaneous +void Assembler::simd_insn(FloatRegister Dd, FloatRegister Dm, unsigned q, unsigned a, + unsigned b, unsigned size) { + starti; + assert(!q || ((Dd->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd registers"); + f(0b111100111, 31, 23), fp_rencode(Dd, false, 12, 22), f(0b11, 21, 20); + f(size, 19, 18), f(a, 17, 16), f(0b0, 11), f(b, 10, 6); + fp_rencode(Dm, false, 0, 5), f(0, 4); +} + +void Assembler::simd_cnt(FloatRegister Dd, FloatRegister Dm, unsigned q) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + starti; + assert(!q || ((Dd->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd registers"); + f(0b111100111, 31, 23), fp_rencode(Dd, false, 12, 22), f(0b110000, 21, 16); + f(0b01010, 11, 7), f(q, 6), fp_rencode(Dm, false, 0, 5), f(0b0, 4); +} + +void Assembler::simd_padl(FloatRegister Dd, FloatRegister Dm, unsigned q, unsigned size, + unsigned op, unsigned encode) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + starti; + assert(!q || ((Dd->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd registers"); + assert(size != 0b11, "unsupported"); + f(0b111100111, 31, 23), f(0b11, 21, 20), f(0b00, 17, 16), f(0b0, 11); + fp_rencode(Dd, false, 12, 22), f(0b0, 4), fp_rencode(Dm, false, 0, 5); + f(size, 19, 18), f(op, 7), f(q, 6), f(encode, 10, 8); +} + +void Assembler::simd_dup(FloatRegister Dd, Register Rt, unsigned q, unsigned size) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(size != 0b11, "must be"); + assert(!q || (Dd->encoding() & 2) == 0, "Odd register"); + starti; + f(0b111011101, 31, 23), f(size >> 1, 22), f(q, 21), f(0, 20), fp_rencode(Dd, false, 16, 7); + rf(Rt, 12), f(0b1011, 11, 8), f(size & 1, 6, 5), f(0b10000, 4, 0); +} + +void Assembler::simd_dup(FloatRegister Dd, FloatRegister Dm, unsigned index, unsigned q, unsigned size) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(size != 0b11, "must be"); + assert(!q || ((Dd->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd registers"); + starti; + f(0b111100111, 31, 23), fp_rencode(Dd, false, 12, 22), f(0b11, 21, 20); + f(((index<<1)|1)<<(2-size), 19, 16), f(0b11000, 11, 7), f(q, 6), fp_rencode(Dm, false, 0, 5), f(0b0, 4); +} + +void Assembler::simd_neg(FloatRegister Dd, FloatRegister Dm, unsigned q, unsigned size) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(size != 0b11, "must be"); + assert(!q || ((Dd->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd registers"); + starti; + f(0b111100111, 31, 23), fp_rencode(Dd, false, 12, 22), f(0b11, 21, 20), f(size, 19, 18); + f(0b01, 17, 16), f(0b00111, 11, 7), f(q, 6), fp_rencode(Dm, false, 0, 5), f(0b0, 4); +} + +void Assembler::simd_vmov(FloatRegister Dd, unsigned imm, unsigned q, unsigned op_cmode) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(!q || (Dd->encoding() & 2) == 0, "Odd register"); + assert(!(imm >> 8), "must be imm8"); + starti; + f(0b1111001, 31, 25), f(imm>>7, 24), f(0b1, 23), fp_rencode(Dd, false, 12, 22); + f(0b000, 21, 19), f((imm>>4)&0x7, 18, 16), f(op_cmode&0xf, 11, 8), f(0b0, 7); + f(q, 6); f(op_cmode>>4, 5), f(0b1, 4), f(imm&0xf, 3, 0); +} + +void Assembler::simd_insn(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, + unsigned q, unsigned a, unsigned b, unsigned u, unsigned c) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(!q || (Dd->encoding() & 2) == 0, "Odd register"); + starti; + f(0b1111001, 31, 25), f(u, 24), f(0b0, 23), f(c, 21, 20), f(a, 11, 8), f(b, 4), f(q, 6); + fp_rencode(Dn, false, 16, 7), fp_rencode(Dd, false, 12, 22), fp_rencode(Dm, false, 0, 5); +} + +void Assembler::simd_mvn(FloatRegister Dd, FloatRegister Dm, unsigned q) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + starti; + f(0b111100111, 31, 23), fp_rencode(Dd, false, 12, 22), f(0b11, 21, 20), f(0b00, 19, 18); + f(0b00, 17, 16), f(0b01011, 11, 7), f(q, 6), fp_rencode(Dm, false, 0, 5), f(0b0, 4); +} + +void Assembler::simd_insn(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, + unsigned qn, unsigned a, unsigned b, unsigned u) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert((Dd->encoding() & 2) == 0, "Odd register"); + assert(!qn || (Dn->encoding() & 2) == 0, "Odd operand register"); + starti; + f(0b1111001, 31, 25), f(u, 24), f(0b1, 23), f(b, 21, 20), f(a, 11, 8), f(0b0, 4), f(0b0, 6); + fp_rencode(Dn, false, 16, 7), fp_rencode(Dd, false, 12, 22), fp_rencode(Dm, false, 0, 5); +} + +void Assembler::simd_vext(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, unsigned q, unsigned imm) { + assert(VM_Version::features() & FT_AdvSIMD, "SIMD coprocessor required"); + assert(!q || ((Dd->encoding() & 2) == 0 && (Dn->encoding() & 2) == 0 && (Dm->encoding() & 2) == 0), "Odd register"); + starti; + f(0b111100101, 31, 23), f(0b11, 21, 20), f(imm, 11, 8), f(q, 6), f(0b0, 4); + fp_rencode(Dn, false, 16, 7), fp_rencode(Dd, false, 12, 22), fp_rencode(Dm, false, 0, 5); +} + +#undef starti --- /dev/null 2018-09-25 19:24:11.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/assembler_aarch32.hpp 2018-09-25 19:24:11.000000000 +0300 @@ -0,0 +1,2612 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_ASSEMBLER_AARCH32_HPP +#define CPU_AARCH32_VM_ASSEMBLER_AARCH32_HPP + +#include "asm/register.hpp" +#include "vm_version_aarch32.hpp" + +// Definitions of various symbolic names for machine registers + +// Here we define how many integer and double precision floating point +// registers are used for passing parameters by the C and Java calling +// conventions. Each double precision floating point register can be used +// as two single precision registers. + +class Argument { + public: + enum { + n_int_register_parameters_c = 4, // c_rarg0, c_rarg1, c_rarg2, c_rarg3 +#ifdef HARD_FLOAT_CC + n_float_register_parameters_c = 8, // c_farg0, c_farg1, ..., c_farg7 +#else // HARD_FLOAT_CC + n_float_register_parameters_c = 0, // 0 registers used to pass arguments +#endif // HARD_FLOAT_CC + n_int_register_parameters_j = 4, // j_rarg0, j_rarg1, j_rarg2, j_rarg3 +#ifdef HARD_FLOAT_CC + n_float_register_parameters_j = 8 // j_farg0, j_farg1, ..., j_farg7 +#else // HARD_FLOAT_CC + n_float_register_parameters_j = 0 // 0 registers used to pass arguments +#endif // HARD_FLOAT_CC + }; +}; + +// Symbolic names for the register arguments used by the C calling convention +// (the calling convention for C runtime calls and calls to JNI native +// methods) + +REGISTER_DECLARATION(Register, c_rarg0, r0); +REGISTER_DECLARATION(Register, c_rarg1, r1); +REGISTER_DECLARATION(Register, c_rarg2, r2); +REGISTER_DECLARATION(Register, c_rarg3, r3); + +// Symbolic names for the register arguments used by the Java calling +// convention (the calling convention for calls to compiled Java methods) + +// In contrary to most ports we don't shift java argument registers by 1. Although +// it helps to avoid extra argument copy when invoking JNI methods it brings a +// lot more complexity into C2 port and prevents from using ldrd/strd instructions +// when dealing with jlong values. +// +// |-----------------------------------| +// | c_rarg0 c_rarg1 c_rarg2 c_rarg3 | +// |-----------------------------------| +// | r0 r1 r2 r3 | +// |-----------------------------------| +// | j_rarg0 j_rarg1 j_rarg2 j_rarg3 | +// |-----------------------------------| + +REGISTER_DECLARATION(Register, j_rarg0, c_rarg0); +REGISTER_DECLARATION(Register, j_rarg1, c_rarg1); +REGISTER_DECLARATION(Register, j_rarg2, c_rarg2); +REGISTER_DECLARATION(Register, j_rarg3, c_rarg3); + +// Common register aliases used in assembler code + +// These registers are used to hold VM data either temporarily within a method +// or across method calls. According to AAPCS, r0-r3 and r12 are caller-saved, +// the rest are callee-saved. + +// These 4 aliases are used in the template interpreter only. + +REGISTER_DECLARATION(Register, rdispatch, r4); // Address of dispatch table +REGISTER_DECLARATION(Register, rbcp, r5); // Bytecode pointer +REGISTER_DECLARATION(Register, rlocals, r6); // Address of local variables section of current frame +REGISTER_DECLARATION(Register, rcpool, r7); // Address of constant pool cache + +// The following aliases are used in all VM components. + +REGISTER_DECLARATION(Register, rmethod, r8); // Address of current method +REGISTER_DECLARATION(Register, rscratch1, r9); // Scratch register +REGISTER_DECLARATION(Register, rthread, r10); // Address of current thread +REGISTER_DECLARATION(Register, rfp, r11); // Frame pointer +REGISTER_DECLARATION(Register, rscratch2, r12); // Scratch register +REGISTER_DECLARATION(Register, sp, r13); // Stack pointer +REGISTER_DECLARATION(Register, lr, r14); // Link register +REGISTER_DECLARATION(Register, r15_pc, r15); // Program counter + + +extern "C" void entry(CodeBuffer *cb); + + +#define assert_cond(ARG1) assert(ARG1, #ARG1) + +class Assembler; + +class Instruction_aarch32 { + unsigned insn; +#ifdef ASSERT + unsigned bits; +#endif + Assembler *assem; + +public: + + Instruction_aarch32(class Assembler *as) { +#ifdef ASSERT + bits = 0; +#endif + insn = 0; + assem = as; + } + + inline ~Instruction_aarch32(); + + unsigned &get_insn() { return insn; } +#ifdef ASSERT + unsigned &get_bits() { return bits; } +#endif + + static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) { + union { + unsigned u; + int n; + }; + + u = val << (31 - hi); + n = n >> (31 - hi + lo); + return n; + } + + static inline uint32_t extract(uint32_t val, int msb, int lsb) { + int nbits = msb - lsb + 1; + assert_cond(msb >= lsb); + uint32_t mask = (1U << nbits) - 1; + uint32_t result = val >> lsb; + result &= mask; + return result; + } + + static inline int32_t sextract(uint32_t val, int msb, int lsb) { + uint32_t uval = extract(val, msb, lsb); + return extend(uval, msb - lsb); + } + + static void patch(address a, int msb, int lsb, unsigned long val) { + int nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + assert_cond(msb >= lsb); + unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + unsigned target = *(unsigned *)a; + target &= ~mask; + target |= val; + *(unsigned *)a = target; + } + + static void spatch(address a, int msb, int lsb, long val) { + int nbits = msb - lsb + 1; + long chk = val >> (nbits - 1); + guarantee (chk == -1 || chk == 0, "Field too big for insn"); + unsigned uval = val; + unsigned mask = (1U << nbits) - 1; + uval &= mask; + uval <<= lsb; + mask <<= lsb; + unsigned target = *(unsigned *)a; + target &= ~mask; + target |= uval; + *(unsigned *)a = target; + } + +/* void f(unsigned val, int msb, int lsb) { + int nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + assert_cond(msb >= lsb); + unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + insn |= val; + assert_cond((bits & mask) == 0); +#ifdef ASSERT + bits |= mask; +#endif + }*/ + + void f(unsigned val, int msb, int lsb) { + int nbits = msb - lsb + 1; + guarantee(val < (1U << nbits), "Field too big for insn"); + assert_cond(msb >= lsb); + unsigned mask = (1U << nbits) - 1; + val <<= lsb; + mask <<= lsb; + insn &= ~mask; + insn |= val; +#ifdef ASSERT + bits |= mask; +#endif + } + + void f(unsigned val, int bit) { + f(val, bit, bit); + } + + void sf(long val, int msb, int lsb) { + int nbits = msb - lsb + 1; + long chk = val >> (nbits - 1); + guarantee (chk == -1 || chk == 0, "Field too big for insn"); + unsigned uval = val; + unsigned mask = (1U << nbits) - 1; + uval &= mask; + f(uval, lsb + nbits - 1, lsb); + } + + void rf(Register r, int lsb) { + f(r->encoding_nocheck(), lsb + 3, lsb); + } + + void rf(FloatRegister r, int lsb) { + f(r->encoding_nocheck(), lsb + 4, lsb); + } + + unsigned get(int msb = 31, int lsb = 0) { + int nbits = msb - lsb + 1; + unsigned mask = ((1U << nbits) - 1) << lsb; + assert_cond((bits & mask) == mask); + return (insn & mask) >> lsb; + } + + void fixed(unsigned value, unsigned mask) { + assert_cond ((mask & bits) == 0); +#ifdef ASSERT + bits |= mask; +#endif + insn |= value; + } +}; + +#define starti Instruction_aarch32 do_not_use(this); set_current(&do_not_use) + +// abs methods which cannot overflow and so are well-defined across +// the entire domain of integer types. +static inline unsigned int uabs(unsigned int n) { + union { + unsigned int result; + int value; + }; + result = n; + if (value < 0) result = -result; + return result; +} +static inline unsigned long uabs(unsigned long n) { + union { + unsigned long result; + long value; + }; + result = n; + if (value < 0) result = -result; + return result; +} +static inline unsigned long uabs(long n) { return uabs((unsigned long)n); } +static inline unsigned long uabs(int n) { return uabs((unsigned int)n); } + +#define S_DFLT ::lsl() +#define C_DFLT AL + + +// Shift for base reg + reg offset addressing +class shift_op { + public: + enum shift_kind { LSL, LSR, ASR, ROR }; + private: + enum shift_source { imm_s, reg_s }; + enum shift_source _source; + enum shift_kind _op; + int _shift; + Register _reg; + + bool check_valid() { + if(imm_s == _source) { + switch(_op) { + case LSL: return _shift >= 0 && _shift <= 31; + case ROR: return _shift >= 1 && _shift <= 32; + default: return _shift >= 1 && _shift <= 32; + } + } + return true; //Don't check register shifts + } + public: + // Default shift is lsl(0) + shift_op() + : _source(imm_s), _op(LSL), _shift(0) { } + shift_op(enum shift_kind op, int shift) + : _source(imm_s), _op(op), _shift(shift) { + if(!shift) { + // All zero shift encodings map to LSL 0 + _shift = 0; + _op = LSL; + } + int pshift = _shift; + if(-1 == _shift && ROR == _op) { + // This is an RRX, make shift valid for the check + _shift = 1; + pshift = 0; //set to zero + } + assert(check_valid(), "Invalid shift quantity"); + _shift = pshift; //restore shift + } + shift_op(enum shift_kind op, Register r) + : _source(reg_s), _op(op), _reg(r) {} + + shift_kind kind() const { + return _op; + } + + int shift() const { + assert(imm_s == _source, "Not an immediate shift"); + return _shift % 32; + } + Register reg() const { + assert(reg_s == _source, "Not a register shift"); + return _reg; + } + bool is_register() { + return reg_s == _source; + } + bool operator==(const shift_op& other) const { + if(imm_s == _source && imm_s == other._source) { + return _op == other._op && _shift == other._shift; + } else if (reg_s == _source && imm_s == _source) { + return _op == other._op && _reg == other._reg; + } + return false; + } + bool operator!=(const shift_op& other) const { + return !( *this == other); + } +}; +class lsl : public shift_op { + public: + lsl(int sft = 0): shift_op(LSL, sft) { } + lsl(Register r): shift_op(LSL, r) { } +}; +class lsr : public shift_op { + public: + lsr(int sft = 0): shift_op(LSR, sft) { } + lsr(Register r): shift_op(LSR, r) { } +}; +class asr : public shift_op { + public: + asr(int sft = 0): shift_op(ASR, sft) { } + asr(Register r): shift_op(ASR, r) { } +}; +class ror : public shift_op { + public: + ror(int sft = 0): shift_op(ROR, sft) {} + ror(Register r): shift_op(ROR, r) { } +}; +class rrx : public shift_op { + public: + rrx(): shift_op(ROR, -1) {} +}; + + +// Addressing modes +class Address { + public: + enum access_mode { no_mode, imm, reg, lit }; + //literal is class of imm? -> potentially have to split later if some instructions work + // with one but not other although can be determined from registers. + enum wb_mode { off, pre, post }; + + enum reg_op { ADD, SUB }; + + private: + Register _base; + Register _index; + int _offset; + enum access_mode _acc_mode; + enum wb_mode _wb_mode; + enum reg_op _as_op; + shift_op _shift; + + RelocationHolder _rspec; + + // Typically we use AddressLiterals we want to use their rval + // However in some situations we want the lval (effect address) of + // the item. We provide a special factory for making those lvals. + bool _is_lval; + + // If the target is far we'll need to load the ea of this to a + // register to reach it. Otherwise if near we can do PC-relative + // addressing. + address _target; + + public: + Address() + : _acc_mode(no_mode) { } + //immediate & literal + Address(Register r, enum wb_mode mode = off) + : _base(r), _index(noreg), _offset(0), _acc_mode(imm), _wb_mode(mode), + _shift(lsl()), _target(0) { + assert(!(r == r15_pc && _wb_mode == pre), "The PC can't be pre-indexed."); + } + Address(Register r, int o, enum wb_mode mode = off) + : _base(r), _index(noreg), _offset(o), _acc_mode(imm), _wb_mode(mode), + _shift(lsl()), _target(0) { + assert(!(r == r15_pc && _wb_mode == pre), "The PC can't be pre-indexed."); + } + Address(Register r, long o, enum wb_mode mode = off) + : _base(r), _index(noreg), _offset(o), _acc_mode(imm), _wb_mode(mode), + _shift(lsl()), _target(0) { + assert(!(r == r15_pc && _wb_mode == pre), "The PC can't be pre-indexed."); + } + Address(Register r, unsigned long o, enum wb_mode mode = off) + : _base(r), _index(noreg), _offset(o), _acc_mode(imm), _wb_mode(mode), + _shift(lsl()), _target(0) { + assert(!(r == r15_pc && _wb_mode == pre), "The PC can't be pre-indexed."); + } + Address(Register r, unsigned int o, enum wb_mode mode = off) + : _base(r), _index(noreg), _offset(o), _acc_mode(imm), _wb_mode(mode), + _shift(lsl()), _target(0) { + assert(!(r == r15_pc && _wb_mode == pre), "The PC can't be pre-indexed."); + } +#ifdef ASSERT + Address(Register r, ByteSize disp) + : _base(r), _index(noreg), _offset(in_bytes(disp)), _acc_mode(imm), _wb_mode(off), + _shift(lsl()), _target(0) { + assert(!(r == r15_pc && _wb_mode == pre), "The PC can't be pre-indexed."); + } +#endif + + + //Register-offset + Address(Register r, Register r1, shift_op shift = lsl(), enum reg_op op = ADD, + enum wb_mode wbm = off) + : _base(r), _index(r1), _offset(0), _acc_mode(reg), _wb_mode(wbm), _as_op(op), + _shift(shift), _target(0) { + assert(!shift.is_register(), "Can't shift a register-offset address by a register"); + } + + Address(address target, RelocationHolder const& rspec) + : _acc_mode(lit), + _base(sp), + _wb_mode(off), + _rspec(rspec), + _is_lval(false), + _target(target) + { } + Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); + +#ifdef COMPILER2 + static Address make_raw(int base, int index, int scale, unsigned long o, relocInfo::relocType disp_reloc); +#endif + + private: + //Could be either + void AddressConstruct(Register base, RegisterOrConstant index, enum reg_op op, shift_op shift, + enum wb_mode mode); + public: + + Address(Register base, RegisterOrConstant index, enum reg_op op, enum wb_mode mode) { + AddressConstruct(base, index, op, lsl(), mode); + } + Address(Register base, RegisterOrConstant index, shift_op shift = lsl(), enum reg_op op = ADD, + enum wb_mode mode = off) { + if(shift.kind() != lsl().kind()) { + assert(index.is_register(), "should be"); + } + AddressConstruct(base, index, op, shift, mode); + } + + + Register base() const { + //in aarch64 this didn't apply to preindex mode -> why? + guarantee(_acc_mode == imm || _acc_mode == reg, "wrong mode"); + return _base; + } + long offset() const { + return _offset; + } + Register index() const { + return _index; + } + shift_op shift() const { + return _shift; + } + reg_op op() const { + return _as_op; + } + access_mode get_mode() const { + return _acc_mode; + } + wb_mode get_wb_mode() const { + return _wb_mode; + } + bool uses(Register reg) const { return _base == reg || _index == reg; } + unsigned reg_bits() { return _base->bit(_acc_mode != no_mode) | _index->bit(_acc_mode == reg); } + address target() const { return _target; } + const RelocationHolder& rspec() const { return _rspec; } + + void encode(Instruction_aarch32 *i, CodeSection *sec, address pc) const; + + void fp_encode(Instruction_aarch32 *i, CodeSection *sec, address pc) const; + + void lea(MacroAssembler *, Register) const; + + typedef enum { + IDT_BOOLEAN = T_BOOLEAN, + IDT_CHAR = T_CHAR, + IDT_FLOAT = T_FLOAT, + IDT_DOUBLE = T_DOUBLE, + IDT_BYTE = T_BYTE, + IDT_SHORT = T_SHORT, + IDT_INT = T_INT, + IDT_LONG = T_LONG, + IDT_OBJECT = T_OBJECT, + IDT_ARRAY = T_ARRAY, + IDT_ADDRESS = T_ADDRESS, + IDT_METADATA = T_METADATA, + // not really a data type, denotes the use when address value is needed + // itself, and Address instance is not used to fetch actual data from memory + IDT_LEA = 100, + // ldrex*/strex* + IDT_ATOMIC = 101, + // multi-word memory access insn (ldmia/stmia etc) + IDT_MULTIWORD + } InsnDataType; + + inline static InsnDataType toInsnDataType(BasicType type) { + return (InsnDataType)type; + } + + Address safe_for(InsnDataType type, MacroAssembler *, Register temp); + bool is_safe_for(InsnDataType); + + static bool offset_ok_for_immed(long offset, InsnDataType type); + static bool shift_ok_for_index(shift_op shift, InsnDataType type); +}; + +// Convience classes +class RuntimeAddress: public Address { + public: + RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {} +}; + +class OopAddress: public Address { + public: + OopAddress(address target) : Address(target, relocInfo::oop_type){} +}; + +class ExternalAddress: public Address { + private: + static relocInfo::relocType reloc_for_target(address target) { + // Sometimes ExternalAddress is used for values which aren't + // exactly addresses, like the card table base. + // external_word_type can't be used for values in the first page + // so just skip the reloc in that case. + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; + } + + public: + ExternalAddress(address target) : Address(target, reloc_for_target(target)) {} +}; + +class InternalAddress: public Address { + public: + InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {} +}; + + +const int FPUStateSizeInWords = FloatRegisterImpl::number_of_registers; + +class Assembler : public AbstractAssembler { + void emit_long(jint x) { + AbstractAssembler::emit_int32(x); + } + +public: + //TODO REMOVE shift_kind from here once done + enum shift_kind { LSL, LSR, ASR, ROR }; + // NOTE RRX is a special case of ROR with shift = 0# + + // Helper functions for shifts + // Here to allow compiler to find global shift_op without :: prefix as lsl is a + // standalone instruction +#define HELPER(NAME) \ + shift_op NAME(int sft = 0) { return ::NAME(sft); } \ + shift_op NAME(Register r) { return ::NAME(r); } + HELPER(lsl); + HELPER(lsr); + HELPER(asr); + HELPER(ror); + shift_op rrx() { return ::rrx(); } +#undef HELPER + + typedef enum { + EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV + } Condition; + + enum { instruction_size = 4 }; + + static const uint32_t nop_insn = 0xe1a00000; + + Address adjust(Register base, int offset, bool preIncrement) { + if (preIncrement) + return Address(base, offset, Address::pre); + else + return Address(base, offset, Address::post); + } + + Address adjust(Register base, Register index, shift_op shift, + enum Address::reg_op op, bool preIncrement) { + return Address(base, index, shift, op, preIncrement ? Address::pre : Address::post); + } + + Address pre(Register base, int offset) { + return adjust(base, offset, true); + } + + Address pre(Register base, Register index, shift_op shift, enum Address::reg_op op) { + return adjust(base, index, shift, op, true); + } + + Address post (Register base, int offset) { + return adjust(base, offset, false); + } + + Instruction_aarch32* current; + + void set_current(Instruction_aarch32* i) { current = i; } + + void f(unsigned val, int msb, int lsb) { + current->f(val, msb, lsb); + } + void f(unsigned val, int msb) { + current->f(val, msb, msb); + } + void sf(long val, int msb, int lsb) { + current->sf(val, msb, lsb); + } + void rf(Register reg, int lsb) { + current->rf(reg, lsb); + } + void rf(FloatRegister reg, int lsb) { + current->rf(reg, lsb); + } + void fixed(unsigned value, unsigned mask) { + current->fixed(value, mask); + } + + void emit() { + emit_long(current->get_insn()); + assert_cond(current->get_bits() == 0xffffffff); + current = NULL; + } + + typedef void (Assembler::* uncond_branch_insn)(address dest); + typedef void (Assembler::* cond_branch_insn)(address dest, Condition cond); + typedef void (Assembler::* cond_ldst_insn)(Register Rt, address dest, Condition cond); + typedef void (Assembler::* cond_fp_ldst_insn)(FloatRegister Vd, address dest, Condition cond); + + void wrap_label(Label &L, uncond_branch_insn insn); + void wrap_label(Label &L, Condition cond, cond_branch_insn insn); + void wrap_label(Register r, Label &L, Condition cond, cond_ldst_insn insn); + void wrap_label(FloatRegister r, Label &L, Condition cond, cond_fp_ldst_insn insn); + +#undef INSN + +// AARCH32 Instructions +// Defined roughly in the order they are found in +// ARM Archicture Reference Manual, section 5 + +#define ZERO_ADDR_REG r0 +#define ONES_ADDR_REG r15 + +// Data processing (register & register-shifted-register) + void reg_instr(int decode, shift_op shift, Condition cond, bool s) { + f(cond, 31, 28), f(0b000, 27, 25), f(decode, 24, 21), f(s, 20); + f(shift.shift(), 11, 7), f(shift.kind(), 6, 5), f(0, 4); + } + void reg_shift_reg_instr(int decode, enum shift_op::shift_kind kind, + Condition cond, bool s) { + f(cond, 31, 28), f(0b000, 27, 25), f(decode, 24, 21), f(s, 20); + f(0, 7), f(kind, 6, 5), f(1, 4); + } + +#define INSN(NAME, decode, s_flg) \ + void NAME(Register Rd, Register Rn, Register Rm, shift_op shift = S_DFLT, \ + Condition cond = C_DFLT) { \ + starti; \ + if(shift.is_register()) { \ + reg_shift_reg_instr(decode, shift.kind(), cond, s_flg); \ + rf(Rn, 16), rf(Rd, 12), rf(shift.reg(), 8), rf(Rm, 0); \ + } else { \ + reg_instr(decode, shift, cond, s_flg); \ + rf(Rn, 16), rf(Rd, 12), rf(Rm, 0); \ + } \ + } + INSN(andr, 0b0000, 0); + INSN(eor, 0b0001, 0); + INSN(sub, 0b0010, 0); + INSN(rsb, 0b0011, 0); + INSN(add, 0b0100, 0); + INSN(adc, 0b0101, 0); + INSN(sbc, 0b0110, 0); + INSN(rsc, 0b0111, 0); + INSN(orr, 0b1100, 0); + INSN(bic, 0b1110, 0); + + INSN(ands, 0b0000, 1); + INSN(eors, 0b0001, 1); + INSN(subs, 0b0010, 1); + INSN(rsbs, 0b0011, 1); + INSN(adds, 0b0100, 1); + INSN(adcs, 0b0101, 1); + INSN(sbcs, 0b0110, 1); + INSN(rscs, 0b0111, 1); + INSN(orrs, 0b1100, 1); + INSN(bics, 0b1110, 1); + +#undef INSN + +#define INSN(NAME, decode) \ + void NAME(Register Rn, Register Rm, Condition cond) { \ + NAME(Rn, Rm, S_DFLT, cond); \ + } \ + void NAME(Register Rn, Register Rm, shift_op shift = S_DFLT, \ + Condition cond = C_DFLT) { \ + starti; \ + if(shift.is_register()) { \ + reg_shift_reg_instr(decode, shift.kind(), cond, true); \ + rf(Rn, 16), f(0b0000, 15, 12), rf(shift.reg(), 8), rf(Rm, 0); \ + } else { \ + reg_instr(decode, shift, cond, true); \ + rf(Rn, 16), f(0, 15, 12), rf(Rm, 0); \ + } \ + } + INSN(tst, 0b1000); + INSN(teq, 0b1001); + INSN(cmp, 0b1010); + INSN(cmn, 0b1011); +#undef INSN + +// TODO appears that if Rd = 15 and s flag set then perhaps different method +void mov_internal(int decode, Register Rd, Register Rnm, shift_op shift, bool s, Condition cond) { + starti; + if(shift.is_register()) { + reg_shift_reg_instr(decode, shift.kind(), cond, s); + f(0b0000, 19, 16), rf(Rd, 12), rf(shift.reg(), 8), rf(Rnm, 0); + } else { + reg_instr(decode, shift, cond, s); + f(0, 19, 16), rf(Rd, 12), rf(Rnm, 0); + } +} +void mov(Register Rd, Register Rm, shift_op shift, Condition cond = C_DFLT) { + mov_internal(0b1101, Rd, Rm, shift, false, cond); +} +void movs(Register Rd, Register Rm, shift_op shift, Condition cond = C_DFLT) { + mov_internal(0b1101, Rd, Rm, shift, true, cond); +} +void mov(Register Rd, Register Rm, Condition cond = C_DFLT) { + mov_internal(0b1101, Rd, Rm, S_DFLT, false, cond); +} +void movs(Register Rd, Register Rm, Condition cond = C_DFLT) { + mov_internal(0b1101, Rd, Rm, S_DFLT, true, cond); +} + +void mvn(Register Rd, Register Rm, shift_op shift, Condition cond = C_DFLT) { + mov_internal(0b1111, Rd, Rm, shift, false, cond); +} +void mvns(Register Rd, Register Rm, shift_op shift, Condition cond = C_DFLT) { + mov_internal(0b1111, Rd, Rm, shift, true, cond); +} +void mvn(Register Rd, Register Rm, Condition cond = C_DFLT) { + mov_internal(0b1111, Rd, Rm, S_DFLT, false, cond); +} +void mvns(Register Rd, Register Rm, Condition cond = C_DFLT) { + mov_internal(0b1111, Rd, Rm, S_DFLT, true, cond); +} + +#define INSN(NAME, type, s_flg, ASSERTION) \ + void NAME(Register Rd, Register Rm, unsigned shift, Condition cond = C_DFLT) { \ + assert_cond(ASSERTION); \ + if(s_flg) movs(Rd, Rm, shift_op(type, shift), cond); \ + else mov(Rd, Rm, shift_op(type, shift), cond); \ + } + INSN(lsl, shift_op::LSL, 0, true); + INSN(lsr, shift_op::LSR, 0, true); + INSN(asr, shift_op::ASR, 0, true); + INSN(ror, shift_op::ROR, 0, shift != 0); //shift == 0 => RRX + + INSN(lsls, shift_op::LSL, 1, true); + INSN(lsrs, shift_op::LSR, 1, true); + INSN(asrs, shift_op::ASR, 1, true); + INSN(rors, shift_op::ROR, 1, shift != 0); //shift == 0 => RRX +#undef INSN + +#define INSN(NAME, type, s_flg) \ + void NAME(Register Rd, Register Rm, Condition cond = C_DFLT) { \ + if(s_flg) movs(Rd, Rm, shift_op(type, 0), cond); \ + else mov(Rd, Rm, shift_op(type, 0), cond); \ + } + INSN(rrx, shift_op::LSR, 0); + INSN(rrxs, shift_op::LSR, 1); +#undef INSN + +//Data processing (register-shifted-register) +#define INSN(NAME, type, s_flg) \ + void NAME(Register Rd, Register Rn, Register Rm, Condition cond = C_DFLT) { \ + if(s_flg) movs(Rd, Rn, shift_op(type, Rm), cond); \ + else mov(Rd, Rn, shift_op(type, Rm), cond); \ + } + INSN(lsl, shift_op::LSL, 0); + INSN(lsr, shift_op::LSR, 0); + INSN(asr, shift_op::ASR, 0); + INSN(ror, shift_op::ROR, 0); + + INSN(lsls, shift_op::LSL, 1); + INSN(lsrs, shift_op::LSR, 1); + INSN(asrs, shift_op::ASR, 1); + INSN(rors, shift_op::ROR, 1); +#undef INSN + + bool imm_instr(int decode, Register Rd, Register Rn, int imm, Condition cond, + bool s) { + if(!is_valid_for_imm12(imm)) + return false; + { + starti; + f(cond, 31, 28), f(0b001, 27, 25), f(decode, 24, 21), f(s, 20), rf(Rn, 16); + int imm12 = encode_imm12(imm); + rf(Rd, 12), f(imm12, 11, 0); + } + return true; + } + +#define INSN(NAME, decode, s_flg) \ + inline void NAME(Register Rd, Register Rn, unsigned imm, Condition cond = C_DFLT) {\ + bool status = imm_instr(decode, Rd, Rn, imm, cond, s_flg); \ + assert(status, "invalid imm"); \ + } + INSN(andr, 0b0000, 0); + INSN(eor, 0b0001, 0); + INSN(orr, 0b1100, 0); + INSN(bic, 0b1110, 0); + + INSN(ands, 0b0000, 1); + INSN(eors, 0b0001, 1); + INSN(orrs, 0b1100, 1); + INSN(bics, 0b1110, 1); + //NOTE: arithmetic immediate instructions are defined below to allow dispatch. +#undef INSN + protected: + // Mov data to destination register in the shortest number of instructions + // possible. + void mov_immediate(Register dst, uint32_t imm32, Condition cond, bool s); + // Mov data to destination register but always emit enough instructions that would + // permit any 32-bit constant to be loaded. (Allow for rewriting later). + void mov_immediate32(Register dst, uint32_t imm32, Condition cond, bool s); + + void add_sub_imm(int decode, Register Rd, Register Rn, int imm, + Condition cond, bool s); + + public: +#define INSN(NAME, decode, s_flg) \ + inline void NAME(Register Rd, Register Rn, int imm, Condition cond = C_DFLT) { \ + add_sub_imm(decode, Rd, Rn, imm, cond, s_flg); \ + } \ + inline void NAME(Register Rd, Register Rn, unsigned imm, \ + Condition cond = C_DFLT) { \ + add_sub_imm(decode, Rd, Rn, imm, cond, s_flg); \ + } \ + inline void NAME(Register Rd, Register Rn, long imm, Condition cond = C_DFLT) { \ + add_sub_imm(decode, Rd, Rn, imm, cond, s_flg); \ + } \ + inline void NAME(Register Rd, Register Rn, unsigned long imm, \ + Condition cond = C_DFLT) { \ + add_sub_imm(decode, Rd, Rn, imm, cond, s_flg); \ + } \ + /*Addition dispatch - place in macroassembler?*/ \ + void NAME(Register Rd, Register Rn, RegisterOrConstant operand, \ + Condition cond = C_DFLT) { \ + if(operand.is_register()) { \ + NAME(Rd, Rn, (Register)operand.as_register(), lsl(), cond); \ + } else { \ + NAME(Rd, Rn, (unsigned)operand.as_constant(), cond); \ + } \ + } \ + inline void NAME(Register Rd, Register Rn, unsigned imm, Register Rtmp, \ + Condition cond = C_DFLT) { \ + if (Assembler::operand_valid_for_add_sub_immediate(imm)) \ + NAME(Rd, Rn, imm, cond); \ + else { \ + mov_immediate(Rtmp, imm, cond, false); \ + NAME(Rd, Rn, Rtmp, cond); \ + } \ + } \ + //Note that the RegisterOrConstant version can't take a shift even though + // one of the instructions dispatched to can + INSN(sub, 0b0010, 0); + INSN(rsb, 0b0011, 0); + INSN(add, 0b0100, 0); + INSN(adc, 0b0101, 0); + INSN(sbc, 0b0110, 0); + INSN(rsc, 0b0111, 0); + + INSN(subs, 0b0010, 1); + INSN(rsbs, 0b0011, 1); + INSN(adds, 0b0100, 1); + INSN(adcs, 0b0101, 1); + INSN(sbcs, 0b0110, 1); + INSN(rscs, 0b0111, 1); +#undef INSN + //No need to do reverse as register subtracted from immediate + + // alias for mvn + void inv(Register Rd, Register Rn, Condition cond = C_DFLT) { + mvn(Rd, Rn, cond); + } + //alias for rsb + void neg(Register Rd, Register Rn, Condition cond = C_DFLT) { + rsb(Rd, Rn, 0, cond); + } + void negs(Register Rd, Register Rn, Condition cond = C_DFLT) { + rsbs(Rd, Rn, 0, cond); + } + + // PC-rel. addressing + void adr_encode(Register Rd, int imm, Condition cond) { + if (is_valid_for_imm12(imm) || is_valid_for_imm12(-imm)) { + add_sub_imm(0b0100, Rd, r15_pc, imm, cond, false); //opcode for add + } else { + int adjust = 0; + if (VM_Version::features() & (FT_ARMV7 | FT_ARMV6T2)) { + adjust = 8; // mov_w/mov_t + } else { + adjust = 16; // mov and 3 orr + } + mov_immediate32(Rd, imm - adjust, cond, false); + add(Rd, r15_pc, Rd, cond); + } + } + + void adr(Register Rd, address dest, Condition cond = C_DFLT); + + void adr(Register Rd, const Address &dest, Condition cond = C_DFLT); + + void adr(Register Rd, Label &L, Condition cond = C_DFLT) { + wrap_label(Rd, L, cond, &Assembler::Assembler::adr); + } + +private: + friend void entry(CodeBuffer *cb); +#define INSN(NAME, decode, s_flg) \ + inline void NAME(Register Rd, unsigned imm, Condition cond = C_DFLT) { \ + bool status = imm_instr(decode, Rd, ZERO_ADDR_REG, imm, cond, s_flg); \ + assert(status, "invalid imm"); \ + } \ + inline void NAME(Register Rd, int imm, Condition cond = C_DFLT) { \ + bool status = imm_instr(decode, Rd, ZERO_ADDR_REG, imm, cond, s_flg); \ + assert(status, "invalid imm"); \ + } +public: + + INSN(mov_i, 0b1101, 0); + INSN(mvn_i, 0b1111, 0); + + INSN(movs_i, 0b1101, 1); + INSN(mvns_i, 0b1111, 1); +#undef INSN + + void movw_i(Register Rd, unsigned imm, Condition cond = C_DFLT) { + starti; + assert(imm < (1 << 16), "Immediate too big for movw"); + f(cond, 31, 28), f(0b00110000, 27, 20), f(imm >> 12, 19, 16); + rf(Rd, 12), f(imm & 0xfff, 11, 0); + } + + void movt_i(Register Rd, unsigned imm, Condition cond = C_DFLT) { + starti; + assert(imm < (1 << 16), "Immediate too big for movt"); + f(cond, 31, 28), f(0b00110100, 27, 20), f(imm >> 12, 19, 16); + rf(Rd, 12), f(imm & 0xfff, 11, 0); + } + +#define INSN(NAME, decode) \ + inline void NAME(Register Rn, int imm, Condition cond = C_DFLT) { \ + bool status = imm_instr(decode, ZERO_ADDR_REG, Rn, imm, cond, true); \ + assert(status, "invalid imm"); \ + } \ + inline void NAME(Register Rn, unsigned imm, Condition cond = C_DFLT) { \ + bool status = imm_instr(decode, ZERO_ADDR_REG, Rn, imm, cond, true); \ + assert(status, "invalid imm"); \ + } \ + inline void NAME(Register Rn, int imm, Register Rtmp, Condition cond = C_DFLT) { \ + if (Assembler::operand_valid_for_add_sub_immediate(imm)) \ + NAME(Rn, imm, cond); \ + else { \ + mov_immediate(Rtmp, imm, cond, false); \ + NAME(Rn, Rtmp, cond); \ + } \ + } \ + inline void NAME(Register Rn, unsigned imm, Register Rtmp, Condition cond = C_DFLT) { \ + if (Assembler::operand_valid_for_add_sub_immediate(imm)) \ + NAME(Rn, imm, cond); \ + else { \ + mov_immediate(Rtmp, imm, cond, false); \ + NAME(Rn, Rtmp, cond); \ + } \ + } + INSN(tst, 0b1000); + INSN(teq, 0b1001); + INSN(cmp, 0b1010); + INSN(cmn, 0b1011); +#undef INSN + + +// Multiply and multiply accumulate + void mult_instr(int decode, Register a, Register b, Register c, + Register d, Condition cond, bool s) { + starti; + f(cond, 31, 28), f(0b0000, 27, 24), f(decode, 23, 21), f(s, 20); + rf(a, 16), rf(b, 12), rf(c, 8), rf(d, 0), f(0b1001, 7, 4); + } + + void mul(Register Rd, Register Rn, Register Rm, Condition cond = C_DFLT) { + mult_instr(0b000, Rd, ZERO_ADDR_REG, Rm, Rn, cond, false); + } + void muls(Register Rd, Register Rn, Register Rm, Condition cond = C_DFLT) { + mult_instr(0b000, Rd, ZERO_ADDR_REG, Rm, Rn, cond, true); + } + + void mla(Register Rd, Register Rn, Register Rm, Register Ra, Condition cond = C_DFLT) { + mult_instr(0b001, Rd, Ra, Rm, Rn, cond, false); + } + void mlas(Register Rd, Register Rn, Register Rm, Register Ra, Condition cond = C_DFLT) { + mult_instr(0b001, Rd, Ra, Rm, Rn, cond, true); + } + + void mls(Register Rd, Register Rn, Register Rm, Register Ra, Condition cond = C_DFLT) { + mult_instr(0b011, Rd, Ra, Rm, Rn, cond, false); + } + + void umaal(Register RdLo, Register RdHi, Register Rn, Register Rm, Condition cond = C_DFLT) { + mult_instr(0b010, RdHi, RdLo, Rm, Rn, cond, false); + } + +#define INSN(NAME, decode, s_flg) \ + void NAME(Register RdLo, Register RdHi, Register Rn, Register Rm, \ + Condition cond = C_DFLT) { \ + mult_instr(decode, RdHi, RdLo, Rm, Rn, cond, s_flg); \ + } + INSN(umull, 0b100, 0); + INSN(umlal, 0b101, 0); + INSN(smull, 0b110, 0); + INSN(smlal, 0b111, 0); + + INSN(umulls, 0b100, 1); + INSN(umlals, 0b101, 1); + INSN(smulls, 0b110, 1); + INSN(smlals, 0b111, 1); + +#undef INSN + +//Saturating addition and subtraction +#define INSN(NAME, decode) \ + void NAME(Register Rd, Register Rm, Register Rn, Condition cond = C_DFLT) { \ + starti; \ + f(cond, 31, 28), f( 0b00010, 27, 23), f(decode, 22, 21), f(0, 20); \ + rf(Rn, 16), rf(Rd, 12), f( 0b00000101, 11, 4), rf(Rm, 0); \ + } + INSN(qadd, 0b00); + INSN(qsub, 0b01); + INSN(qdadd, 0b10); + INSN(qdsub, 0b11); +#undef INSN + +// Halfword multiply and multiply accumulate + void mul_instr(int decode, Register Ra, Register Rb, Register Rc, Register Rd, + bool N, bool M, Condition cond) { + starti; + f(cond, 31, 28), f(0b00010, 27, 23), f(decode, 22, 21), f(0, 20); + rf(Ra, 16), rf(Rb, 12), rf(Rc, 8), f(1, 7), f(M, 6), f(N, 5), f(0, 4); + rf(Rd, 0); + } + +#define INSN(NAME, decode, N, M) \ + void NAME(Register Rd, Register Rn, Register Rm, Register Ra, \ + Condition cond = C_DFLT) { \ + mul_instr(decode, Rd, Ra, Rm, Rn, N, M, cond); \ + } + INSN(smlabb, 0b00, 0, 0); + INSN(smlabt, 0b00, 0, 1) + INSN(smlatb, 0b00, 1, 0) + INSN(smlatt, 0b00, 1, 1) + + INSN(smlawb, 0b01, 0, 0); + INSN(smlawt, 0b01, 0, 1); +#undef INSN + +#define INSN(NAME, decode, N, M) \ + void NAME(Register RdLo, Register RdHi, Register Rn, Register Rm, \ + Condition cond = C_DFLT) { \ + mul_instr(decode, RdHi, RdLo, Rm, Rn, N, M, cond); \ + } + INSN(smlalbb, 0b10, 0, 0); + INSN(smlalbt, 0b10, 0, 1); + INSN(smlaltb, 0b10, 1, 0); + INSN(smlaltt, 0b10, 1, 1); +#undef INSN + +#define INSN(NAME, decode, N, M) \ + void NAME(Register Rd, Register Rn, Register Rm, Condition cond = C_DFLT) { \ + mul_instr(decode, Rd, ZERO_ADDR_REG, Rm, Rn, N, M, cond); \ + } + INSN(smulwb, 0b01, 1, 0); + INSN(smulwt, 0b01, 1, 1); + + INSN(smulbb, 0b11, 0, 0); + INSN(smulbt, 0b11, 0, 1); + INSN(smultb, 0b11, 1, 0); + INSN(smultt, 0b11, 1, 1); +#undef INSN + +// For Extra load/store instructions, see load/store section +// For Synchronization primitives, see load/store section + +// MSR(immediate), and hints +#define INSN(NAME, decode) \ + void NAME(Condition cond = C_DFLT) { \ + starti; \ + f(cond, 31, 28), f(0b001100100000, 27, 16), f(0b11110000, 15, 8); \ + f(decode, 7, 0); \ + } + INSN(nop, 0b000); + INSN(yield, 0b001); + INSN(wfe, 0b010); + INSN(wfi, 0b011); + INSN(sev, 0b100); + void dbg(int dbg_hint, Condition cond = C_DFLT) { + f(cond, 31, 28), f(0b001100100000, 27, 16), f(0b11110000, 15, 8); + f(0b1111, 7, 4); f(dbg_hint, 3, 0); + } +#undef INSN + + //TODO Misc instructions + void bkpt(unsigned imm) { + starti; + f(AL, 31, 28), f(0b00010010, 27, 20); + f(imm >> 4, 19, 8), f(0b0111, 7, 4), f(imm & 0xf, 3, 0); + } + void hlt(unsigned imm) { + bkpt(imm); + // FIXME This seemed like the best option! + } + + // Load/store register (all modes) + void load_store_instr(Register Rt, const Address &adr, int op, int op2, int a, int b, + Condition cond) { + starti; + f(cond, 31, 28), f(op, 27, 25), f(a, 22), f(b, 20); + if(op2 >= 0) + f(op2, 7, 4); + //Destination + rf(Rt, 12); + adr.encode(current, code_section(), pc()); + } + + bool encodeable(int decode, address dest) { + long offset = dest - pc(); + switch(decode) { + case 0b010: + // LDR, LDRB, STR, STRB + return uabs(offset) < (1 << 12); + case 0b000: + //LDRD, LDRH, LDRSB, LDRSH, STRH, STRD + return uabs(offset) < (1 << 8); + default: + ShouldNotReachHere(); + } + return false; + } + + +#define INSN_INT(NAME, op, op2, a, b, isload) \ + void NAME(Register Rt, address dest, Condition cond = C_DFLT) { \ + if(encodeable(op, dest)) { /* Plan A */ \ + long offset = dest - pc(); \ + NAME(Rt, Address(r15_pc, offset), cond); \ + } else if(isload){ /* Plan B */ \ + /* TODO check we don't have to relocate this*/ \ + mov_immediate(Rt, (uint32_t)dest, cond, false); \ + NAME(Rt, Address(Rt, 0), cond); \ + } else { /* There is no plan C */ \ + ShouldNotReachHere(); \ + } \ + } \ + void NAME(Register Rt, address dest, relocInfo::relocType rtype, \ + Condition cond = C_DFLT) { \ + guarantee(rtype == relocInfo::internal_word_type, \ + "only internal_word_type relocs make sense here"); \ + NAME(Rt, InternalAddress(dest), cond); \ + } \ + void NAME(Register Rt, Label &L, Condition cond = C_DFLT) { \ + wrap_label(Rt, L, cond, &Assembler::NAME); \ + } + +#define INSN(NAME, op, op2, a, b, isload) \ + void NAME(Register Rt, const Address &adr, Condition cond = C_DFLT) { \ + load_store_instr(Rt, adr, op, op2, a, b, cond); \ + } \ + INSN_INT(NAME, op, op2, a, b, isload); + INSN(ldr, 0b010, -1, 0, 1, 1); + INSN(ldrb, 0b010, -1, 1, 1, 1); + + INSN(ldrsb, 0b000, 0b1101, 0, 1, 1); + INSN(ldrh, 0b000, 0b1011, 0, 1, 1); + INSN(ldrsh, 0b000, 0b1111, 0, 1, 1); + + INSN(str, 0b010, -1, 0, 0, 0); + INSN(strb, 0b010, -1, 1, 0, 0); + INSN(strh, 0b000, 0b1011, 0, 0, 0); + //Note LDRD & STRD are defined with the load/store multiple instructions + + //TODO Need to introduce ldrsb ldrsh - then check that the encoding works properly! +#undef INSN + + + //Synchronization primitives + void sync_instr(int decode, Register Ra, Register Rb, Register Rc, Register Rd, + Condition cond) { + starti; + f(cond, 31, 28), f(0b0001, 27, 24), f(decode, 23, 20), rf(Ra, 16), rf(Rb, 12); + rf(Rc, 8), f(0b1001, 7, 4), rf(Rd, 0); + } + +#define INSN(NAME, decode) \ + void NAME(Register Rd, Register Rt, Register Rn, Condition cond = C_DFLT) { \ + assert(r15_pc != Rn, "Unpredictable"); \ + sync_instr(decode, Rn, Rd, ONES_ADDR_REG, Rt, cond); \ + } \ + void NAME(Register Rd, Register Rt, Address a, Condition cond = C_DFLT) { \ + assert(a.get_mode() == Address::imm, "must be"); \ + assert(a.offset() == 0, "unsupported"); \ + NAME(Rd, Rt, a.base(), cond); \ + } + INSN( strex, 0b1000); + INSN(strexd, 0b1010); + INSN(strexb, 0b1100); + INSN(strexh, 0b1110); +#undef INSN + +#define INSN(NAME, decode) \ + void NAME(Register Rt, Register Rn, Condition cond = C_DFLT) { \ + assert(r15_pc != Rn, "Unpredictable"); \ + sync_instr(decode, Rn, Rt, ONES_ADDR_REG, ONES_ADDR_REG, cond); \ + } \ + void NAME(Register Rt, Address a, Condition cond = C_DFLT) { \ + assert(a.get_mode() == Address::imm, "must be"); \ + assert(a.offset() == 0, "unsupported"); \ + NAME(Rt, a.base(), cond); \ + } + INSN(ldrex, 0b1001); + INSN(ldrexd, 0b1011); + INSN(ldrexb, 0b1101); + INSN(ldrexh, 0b1111); +#undef INSN + +// Media instructions +void media_instr(int decode, int decode2, Condition cond) { + f(cond, 31, 28), f(0b011, 27, 25), f(decode, 24, 20); + f(decode2, 7, 5), f(1, 4); +} + +#define INSN(NAME, decode, decode2) \ + void NAME(Register Rd, Register Rn, Register Rm, Condition cond = C_DFLT) { \ + starti; \ + media_instr(0b00000 | decode, decode2, cond); \ + rf(Rn, 16), rf(Rd, 12), f(0b1111, 11, 8), rf(Rm, 0); \ + } + INSN(sadd16, 0b01, 0b000); + INSN(sasx, 0b01, 0b001); + INSN(ssax, 0b01, 0b010); + INSN(ssub16, 0b01, 0b011); + INSN(sadd8, 0b01, 0b100); + INSN(ssub8, 0b01, 0b111); + //Saturating + INSN(qadd16, 0b10, 0b000); + INSN(qasx, 0b10, 0b001); + INSN(qsax, 0b10, 0b010); + INSN(qsub16, 0b10, 0b011); + INSN(qadd8, 0b10, 0b100); + INSN(qsub8, 0b10, 0b111); + //Halving + INSN(shadd16, 0b11, 0b000); + INSN(shasx, 0b11, 0b001); + INSN(shsax, 0b11, 0b010); + INSN(shsub16, 0b11, 0b011); + INSN(shadd8, 0b11, 0b100); + INSN(shsub8, 0b11, 0b111); + + //Now unsigned + INSN(uadd16, 0b101, 0b000); + INSN(uasx, 0b101, 0b001); + INSN(usax, 0b101, 0b010); + INSN(usub16, 0b101, 0b011); + INSN(uadd8, 0b101, 0b100); + INSN(usub8, 0b101, 0b111); + //Saturating + INSN(uqadd16, 0b110, 0b000); + INSN(uqasx, 0b110, 0b001); + INSN(uqsax, 0b110, 0b010); + INSN(uqsub16, 0b110, 0b011); + INSN(uqadd8, 0b110, 0b100); + INSN(uqsub8, 0b110, 0b111); + //Halving + INSN(uhadd16, 0b111, 0b000); + INSN(uhasx, 0b111, 0b001); + INSN(uhsax, 0b111, 0b010); + INSN(uhsub16, 0b111, 0b011); + INSN(uhadd8, 0b111, 0b100); + INSN(uhsub8, 0b111, 0b111); +#undef INSN + +//Packing, unpacking, saturation and reversal +// Note rotation can only be one of ROR #0 ROR #8 ROR #16 ROR #24 +void extend_instr(int decode, int decode2, int decode3, Register Rd, Register Rn, + Register Rm, shift_op shift, Condition cond) { + starti; + assert(0 == shift.shift() || + shift_op::ROR == shift.kind(), "Only ROR may be used for op"); + // All zero shifts are mapped to LSL #0 + int shift_enc = 0; + switch(shift.shift()) { + case 0: break; + case 8: shift_enc = 1; break; + case 16: shift_enc = 2; break; + case 24: shift_enc = 3; break; + default: assert(false, "Invalid shift quantity"); + } + media_instr(0b01000 | decode, decode2, cond); + rf(Rn, 16), rf(Rd, 12), f(shift_enc, 11, 10), f(decode3, 9, 8), rf(Rm, 0); +} +void extend_instr(int decode, int decode2, int decode3, Register Rd, Register Rn, + unsigned imm, Condition cond) { + starti; + media_instr(0b01000 | decode, decode2, cond); + rf(Rn, 0), rf(Rd, 12), f(decode3, 11, 8), f(imm, 19, 16); +} + +#define INSN(NAME, decode, decode2) \ + void NAME(Register Rd, Register Rn, Register Rm, shift_op shift = ::ror(), \ + Condition cond = C_DFLT) { \ + assert(0xf != Rn->encoding_nocheck(), "Rn = pc makes different instruction"); \ + extend_instr(decode, decode2, 0b00, Rd, Rn, Rm, shift, cond); \ + } + INSN(sxtab16, 0b000, 0b011); + INSN(sxtab, 0b010, 0b011); + INSN(sxtah, 0b011, 0b011); + INSN(uxtab16, 0b100, 0b011); + INSN(uxtab, 0b110, 0b011); + INSN(uxtah, 0b111, 0b011); +#undef INSN + +#define INSN(NAME, decode, decode2) \ + void NAME(Register Rd, Register Rm, shift_op shift = ::ror(), \ + Condition cond = C_DFLT) { \ + extend_instr(decode, decode2, 0b00, Rd, ONES_ADDR_REG, Rm, shift, cond); \ + } + INSN(sxtb16, 0b000, 0b011); + INSN(sxtb, 0b010, 0b011); + INSN(sxth, 0b011, 0b011); + INSN(uxtb16, 0b100, 0b011); + INSN(uxtb, 0b110, 0b011); + INSN(uxth, 0b111, 0b011); +#undef INSN + +#define INSN(NAME, decode, decode2) \ + void NAME(Register Rd, unsigned imm, Register Rn, Condition cond = C_DFLT) { \ + extend_instr(decode, decode2, 0b1111, Rd, Rn, imm, cond); \ + } + INSN(usat16, 0b110, 0b001); +#undef INSN + + //Reverse instructions +#define INSN(NAME, decode, decode2) \ + void NAME(Register Rd, Register Rm, Condition cond = C_DFLT) { \ + extend_instr(decode, decode2, 0b11, Rd, ONES_ADDR_REG, Rm, ::ror(24), cond); \ + } + INSN(rev, 0b011, 0b001); + INSN(rev16, 0b011, 0b101); + INSN(rbit, 0b111, 0b001); + INSN(revsh, 0b111, 0b101); +#undef INSN + +// Signed multiply, signed and unsigned divide +#define INSN(NAME, decode, decode2) \ + void NAME(Register Rd, Register Rn, Register Rm, Condition cond = C_DFLT) { \ + starti; \ + media_instr(0b10000 | decode, decode2, cond); \ + rf(Rd, 16), f(0b1111, 15, 12), rf(Rm, 8), rf(Rn, 0); \ + } + INSN(sdiv, 0b001, 0b000); + INSN(udiv, 0b011, 0b000); + INSN(smuad, 0b000, 0b000); + INSN(smuadx, 0b000, 0b001); + INSN(smusd, 0b000, 0b010); + INSN(smusdx, 0b000, 0b011); + INSN(smmul, 0b101, 0b000); + INSN(smmulr, 0b101, 0b001); + //TODO ALL THE REST! +#undef INSN + +// Remainder of things +#define INSN(NAME, decode, decode2) \ + void NAME(Register Rd, Register Rn, int lsb, int width, \ + Condition cond = C_DFLT) { \ + starti; \ + assert(lsb >= 0 && lsb < 32, "lsb out of range"); \ + assert(width > 0 && width <= 32 - lsb, "width out of range"); \ + media_instr(decode, decode2, cond); \ + f(width - 1, 20, 16), rf(Rd, 12), f(lsb, 11, 7), rf(Rn, 0); \ + } + INSN(sbfx, 0b11010, 0b010); + INSN(ubfx, 0b11110, 0b010); +#undef INSN + +void bfi(Register Rd, Register Rn, int lsb, int width, Condition cond = C_DFLT) { + assert(VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7), "unsupported on the cpu"); + int msb = lsb + width - 1; + assert(lsb >= 0 && lsb < 32, "lsb out of range"); + assert(msb < 32 && msb >= lsb, "width out of range"); + starti; + media_instr(0b11100, 0b000, cond); + f(msb, 20, 16), rf(Rd, 12), f(lsb, 11, 7), rf(Rn, 0); +} + +void bfc(Register Rd, int lsb, int width, Condition cond = C_DFLT) { + assert(VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7), "unsupported on the cpu"); + int msb = lsb + width - 1; + assert(lsb >= 0 && lsb < 32, "lsb out of range"); + assert(msb < 32 && msb >= lsb, "width out of range"); + starti; + media_instr(0b11100, 0b000, cond); + f(msb, 20, 16), rf(Rd, 12), f(lsb, 11, 7), f(0b1111, 3, 0); +} + +void clz(Register Rd, Register Rm, Condition cond = C_DFLT) { + assert(Rd != r15_pc && Rm != r15_pc, "must be"); + starti; + f(cond, 31, 28), f(0b000101101111, 27, 16), rf(Rd, 12); + f(0b11110001, 11, 4), rf(Rm, 0); +} + +//Branch, branch with link, and block data transfer + +void block_imm_instr(int decode, int w, Register Rn, unsigned regset, + Condition cond) { + starti; + f(cond, 31, 28), f(0b10, 27, 26), f(decode | (w << 1), 25, 20); + rf(Rn, 16), f(regset, 15, 0); +} +#define INSN(NAME, decode) \ + void NAME(Register Rn, unsigned regset, bool wb = true, Condition cond = C_DFLT) { \ + block_imm_instr(decode, wb, Rn, regset, cond); \ + } + INSN(stmda, 0b000000); + INSN(stmed, 0b000000); + + INSN(ldmda, 0b000001); + INSN(ldmfa, 0b000001); + + //INSN(stm, 0b001000); + INSN(stmia, 0b001000); + INSN(stmea, 0b001000); + + //INSN(ldm, 0b001001); + INSN(ldmia, 0b001001); + INSN(ldmfd, 0b001001); + + INSN(stmdb, 0b010000); + INSN(stmfd, 0b010000); + + INSN(ldmdb, 0b010001); + INSN(ldmea, 0b010001); + + INSN(stmib, 0b011000); + INSN(stmfa, 0b011000); + + INSN(ldmib, 0b011001); + INSN(ldmed, 0b011001); +#undef INSN + +unsigned count_bits(unsigned val); +bool can_ldst_multiple( unsigned regset, const Address& adr); + +//NOTE!! Have repurposed stm and ldm for auto dispatch instructions +#define INSN(NAME, PREFIX) \ + void NAME(unsigned regset, const Address& adr, Condition cond = C_DFLT) { \ + assert(can_ldst_multiple(regset, adr), "Can't do anything with this!"); \ + int offset = adr.offset(); \ + switch(adr.get_wb_mode()) { \ + case Address::pre: \ + if(offset > 0) PREFIX##mib(adr.base(), regset, true, cond); \ + else PREFIX##mdb(adr.base(), regset, true, cond); \ + break; \ + case Address::post: \ + if(offset > 0) PREFIX##mia(adr.base(), regset, true, cond); \ + else PREFIX##mda(adr.base(), regset, offset != 0, cond); \ + break; \ + case Address::off: \ + if(offset > 0) PREFIX##mib(adr.base(), regset, false, cond); \ + else if(!offset) PREFIX##mia(adr.base(), regset, false, cond); \ + else PREFIX##mdb(adr.base(), regset, false, cond); \ + break; \ + default: \ + ShouldNotReachHere(); \ + } \ + } + INSN(ldm, ld); + INSN(stm, st); +#undef INSN + +//Made push and pop operate on full descending stacks +#define INSN(NAME, CNAME) \ + inline void NAME(unsigned regset, Condition cond = C_DFLT) { \ + CNAME(r13, regset, true, cond); \ + } + INSN(pop, ldmia); + INSN(push, stmdb); +#undef INSN + + public: + +#define INSN(NAME, PREFIX, op, op2, a, b, isload) \ + void NAME(Register Rt, const Address& adr, Condition cond = C_DFLT) { \ + load_store_instr(Rt, adr, op, op2, a, b, cond); \ + } \ + INSN_INT(NAME, op, op2, a, b, isload); + + INSN(ldrd, ld, 0b000, 0b1101, 0, 0, 1); + INSN(strd, st, 0b000, 0b1111, 0, 0, 0); +#undef INSN +#undef INSN_INT + + // Branches + + // For immediate branches: + // The maximum range of a branch is fixed for the aarch32 + // architecture. In debug mode we shrink it in order to test + // trampolines, but not so small that branches in the interpreter + // are out of range. Compiler2 is ported in the assumption that code cache is + // always reachable with immediate branch, so cannot restrict the size + static const unsigned long branch_range = + COMPILER2_PRESENT(32 * M) NOT_COMPILER2(NOT_DEBUG(32 * M) DEBUG_ONLY(2 * M)); + static bool reachable_from_branch_at(address branch, address target) { + return uabs(target - branch) < branch_range; + } + + void branch_imm_instr(int decode, address dest, Condition cond) { + starti; + // Correct PC for as it will be when executing this instruction + int offset = (dest - (pc() + 8)) >> 2; + assert(reachable_from_branch_at(pc(), dest), "branch target unreachable"); + f(cond, 31, 28), f(decode, 27, 24), sf(offset, 23, 0); + } + + void branch_reg_instr(int decode, Register Rm, Condition cond) { + starti; + f(cond, 31, 28), f(0b00010010, 27, 20); + f(0b111111111111, 19, 8), f(decode, 7, 4), rf(Rm, 0); + } + +#define INSN(NAME, decode_imm, decode_reg) \ + void NAME(Register Rm, Condition cond = C_DFLT) { \ + branch_reg_instr(decode_reg, Rm, cond); \ + } \ + void NAME(address dest, Condition cond = C_DFLT) { \ + branch_imm_instr(decode_imm, dest, cond); \ + } \ + void NAME(Label &L, Condition cond = C_DFLT) { \ + wrap_label(L, cond, &Assembler::NAME); \ + } \ + void NAME(const Address &dest, Condition cond = C_DFLT) { \ + code_section()->relocate(pc(), dest.rspec()); \ + NAME(dest.target(), cond); \ + } + //TODO assert type of address + INSN(b, 0b1010, 0b0001); // B & BX + INSN(bl, 0b1011, 0b0011); // BL & BLX +#undef INSN + + +//TODO Coprocessor instructions, and Supervisor Call + + +// Unconditional Instructions + enum barrier {OSHST = 0b0010, OSH, + NSHST = 0b0110, NSH, + ISHST = 0b1010, ISH, + ST = 0b1110, SY}; + + void sync_instr(int decode, enum barrier option) { + starti; + f(0b11110, 31, 27), f(0b1010111, 26, 20), f(0b111111110000, 19, 8); + f(decode, 7, 4), f(option, 3, 0); + } + void clrex() { + sync_instr(0b0001, SY); + } + void dsb(enum barrier option) { + sync_instr(0b0100, option); + } + void dmb(enum barrier option) { + sync_instr(0b0101, option); + } + void bkpt(); + void isb() { + sync_instr(0b0110, SY); + } + + void udf(int imm_16) { + assert((imm_16 >> 16) == 0, "encoding constraint"); + emit_int32(0xe7f000f0 | (imm_16 & 0xfff0) << 8 | (imm_16 & 0xf)); + } + + // And the relevant instructions for ARMv6. + + // MCR , , , , {, } + void mcr(int cpc_dex, int opc1, Register Rt, int cpc_reg_dex1, + int cpc_reg_dex2, int opc2, Condition cond = C_DFLT) { + starti; + f(cond, 31, 28), f(0b1110, 27, 24), f(opc1, 23, 21), f(0, 20); + f(cpc_reg_dex1, 19, 16), rf(Rt, 12), f(cpc_dex, 11, 8); + f(opc2, 7, 5), f(1, 4), f(cpc_reg_dex2, 3, 0); + } + + // These instructions do not read the value of the register passed, + // can be any. Chosen r0. + void cp15dmb(Condition cond = C_DFLT) { + mcr(15, 0, r0, 7, 10, 5, cond); + } + + void cp15dsb(Condition cond = C_DFLT) { + mcr(15, 0, r0, 7, 10, 4, cond); + } + + void cp15isb(Condition cond = C_DFLT) { + mcr(15, 0, r0, 7, 5, 4, cond); + } + + enum Membar_mask_bits { + // We can use ISH for a barrier because the ARM ARM says "This + // architecture assumes that all Processing Elements that use the + // same operating system or hypervisor are in the same Inner + // Shareable shareability domain." + StoreStore = ISHST, + LoadStore = ISH, //ISHLD, Changed to + LoadLoad = ISH, //ISHLD, + StoreLoad = ISH, + AnyAny = ISH + }; + + void mrs(Register Rd, Condition cond = C_DFLT) { + starti; + f(cond, 31, 28), f(0b00010, 27, 23), f(0, 22), f(0b00, 21, 20), f(0b1111, 19, 16); + rf(Rd, 12), f(0b000000000000, 11, 0); + } + + void msr(Register Rn, bool nzcvq = true, bool g = true, Condition cond = C_DFLT) { + starti; + f(cond, 31, 28), f(0b00010, 27, 23), f(0, 22), f(0b10, 21, 20); + f(nzcvq ? 1 : 0, 19), f(g ? 1 : 0, 18), f(0b00, 17, 16); + f(0b111100000000, 15, 4), rf(Rn, 0); + } + +// Floating point operations + +enum fpscr_cond { FP_EQ = 0b0110 << 28, + FP_LT = 0b1000 << 28, + FP_GT = 0b0010 << 28, + FP_UN = 0b0011 << 28, + FP_MASK = 0b1111 << 28 }; + + void fp_instr_base(bool is64bit, Condition cond) { + f(cond, 31, 28), f(0b1110, 27, 24), f(0b101, 11, 9), f(is64bit, 8), f(0, 4); + } + + void fp_rencode(FloatRegister reg, bool is64bit, int base, int bit) { + int reg_val = reg->encoding_nocheck(); + if(!is64bit) { + f( reg_val >> 1, base + 3, base); + f( reg_val & 1, bit); + } else { + f( reg_val & 0xf, base + 3, base); + f( reg_val >> 4, bit); + } + } + + void fp_instr(int decode, int op, bool is64bit, FloatRegister Rd, FloatRegister Rn, + FloatRegister Rm, Condition cond) { + fp_instr_base(is64bit, cond); + f(decode, 23, 20), f(op, 6); + // Register encoding is a bit involved + // double register passed (see 'd0'-'dN' encoding), not reencode it's number + fp_rencode(Rn, false, 16, 7); + fp_rencode(Rd, false, 12, 22); + fp_rencode(Rm, false, 0, 5); + } + +#define INSN(NAME, decode, op, is64bit) \ + void NAME(FloatRegister Rd, FloatRegister Rn, FloatRegister Rm, \ + Condition cond = C_DFLT) { \ + starti; \ + fp_instr(decode, op, is64bit, Rd, Rn, Rm, cond); \ + } + INSN(vmla_f32, 0b0000, 0, 0); + INSN(vmla_f64, 0b0000, 0, 1); + INSN(vmls_f32, 0b0000, 1, 0); + INSN(vmls_f64, 0b0000, 1, 1); + + INSN(vnmla_f32, 0b0001, 1, 0); + INSN(vnmla_f64, 0b0001, 1, 1); + INSN(vnmls_f32, 0b0001, 0, 0); + INSN(vnmls_f64, 0b0001, 0, 1); + INSN(vnmul_f32, 0b0010, 1, 0); + INSN(vnmul_f64, 0b0010, 1, 1); + INSN(vmul_f32, 0b0010, 0, 0); + INSN(vmul_f64, 0b0010, 0, 1); + + INSN(vadd_f32, 0b0011, 0, 0); + INSN(vadd_f64, 0b0011, 0, 1); + INSN(vsub_f32, 0b0011, 1, 0); + INSN(vsub_f64, 0b0011, 1, 1); + + INSN(vdiv_f32, 0b1000, 0, 0); + INSN(vdiv_f64, 0b1000, 0, 1); + + INSN(vfnma_f32, 0b1001, 1, 0); + INSN(vfnma_f64, 0b1001, 1, 1); + INSN(vfnms_f32, 0b1001, 0, 0); + INSN(vfnms_f64, 0b1001, 0, 1); + + INSN(vfma_f32, 0b1010, 0, 0); + INSN(vfma_f64, 0b1010, 0, 1); + INSN(vfms_f32, 0b1010, 1, 0); + INSN(vfms_f64, 0b1010, 1, 1); +#undef INSN + + + void vmov_imm(FloatRegister Rd, unsigned imm, bool is64bit, Condition cond); + void vmov_imm(FloatRegister Rd, unsigned imm); + void vmov_imm_zero(FloatRegister Rd, bool is64bit, Condition cond); + + unsigned encode_float_fp_imm(float imm_f); + + void vmov_f32(FloatRegister Rd, float imm, Condition cond = C_DFLT) { + vmov_imm(Rd, encode_float_fp_imm(imm), false, cond); + } + + unsigned encode_double_fp_imm(double imm_f); + + void vmov_f64(FloatRegister Rd, double imm, Condition cond = C_DFLT) { + bool positive_zero = (imm == 0.0) && !signbit(imm); + if(positive_zero) vmov_imm_zero(Rd, true, cond); + else vmov_imm(Rd, encode_double_fp_imm(imm), true, cond); + } + +#define INSN(NAME, decode, op, is64bit) \ + void NAME(FloatRegister Rd, FloatRegister Rm, Condition cond = C_DFLT) { \ + starti; \ + fp_instr_base(is64bit, cond); \ + f(0b1011, 23, 20), f(decode, 19, 16), f(op, 7, 6), f(0b00, 5, 4); \ + /* double register passed (see 'd0'-'dN' encoding), not reencode it's number */ \ + fp_rencode(Rd, false, 12, 22); \ + fp_rencode(Rm, false, 0, 5); \ + } + INSN(vmov_f32, 0b0000, 0b01, 0); + INSN(vmov_f64, 0b0000, 0b01, 1); + INSN(vabs_f32, 0b0000, 0b11, 0); + INSN(vabs_f64, 0b0000, 0b11, 1); + INSN(vneg_f32, 0b0001, 0b01, 0); + INSN(vneg_f64, 0b0001, 0b01, 1); + INSN(vsqrt_f32, 0b0001, 0b11, 0); + INSN(vsqrt_f64, 0b0001, 0b11, 1); +#undef INSN + +//ARM -> FP, FP -> ARM +// NOTE - Have only implemented the double precision variant as only operating on +// double registers - can still be used to copy single precision +void vmov64_instr_base(FloatRegister Rm, Register Rt, Register Rt2, int op, + Condition cond) { + starti; + f(cond, 31, 28), f(0b1100010, 27, 21), f(op, 20); + rf(Rt2, 16), rf(Rt, 12), f(0b101100, 11, 6), f(1, 4); + // double register passed (see 'd0'-'dN' encoding), not reencode it's number + fp_rencode(Rm, false, 0, 5); +} + +void vmov_f64(FloatRegister Rm, Register Rt, Register Rt2, Condition cond = C_DFLT) { + vmov64_instr_base(Rm, Rt, Rt2, 0, cond); +} +void vmov_f64(Register Rt, Register Rt2, FloatRegister Rm, Condition cond = C_DFLT) { + vmov64_instr_base(Rm, Rt, Rt2, 1, cond); +} + +void vmov_f32(FloatRegister Rn, Register Rt, Condition cond = C_DFLT) { + starti; + fp_instr_base(false, cond); + f(0b000, 23, 21), f(0, 20); + rf(Rt, 12), f(0b101000010000, 11, 0); + // double register passed (see 'd0'-'dN' encoding), not reencode it's number + fp_rencode(Rn, false, 16, 7); +} +void vmov_f32(Register Rt, FloatRegister Rn, Condition cond = C_DFLT) { + starti; + fp_instr_base(false, cond); + f(0b000, 23, 21), f(1, 20); + rf(Rt, 12), f(0b101000010000, 11, 0); + // double register passed (see 'd0'-'dN' encoding), not reencode it's number + fp_rencode(Rn, false, 16, 7); +} + +// Floating-point comparison +#define INSN(NAME, E, is64bit) \ + void NAME(FloatRegister Rd, int imm, Condition cond = C_DFLT) { \ + assert(0 == imm, "vector compare can only be with another vector or zero"); \ + starti; \ + fp_instr_base(is64bit, cond); \ + f(0b10110101, 23, 16), f(E, 7), f(0b1000000, 6, 0); \ + /* double register passed (see 'd0'-'dN' encoding), not reencode it's number */ \ + fp_rencode(Rd, false, 12, 22); \ + } \ + void NAME(FloatRegister Vd, FloatRegister Vm, Condition cond = C_DFLT) { \ + starti; \ + fp_instr_base(is64bit, cond); \ + f(0b10110100, 23, 16), f(E, 7), f(1, 6), f(0, 4); \ + /* double register passed (see 'd0'-'dN' encoding), not reencode it's number */ \ + fp_rencode(Vd, false, 12, 22), fp_rencode(Vm, false, 0, 5); \ + } + INSN(vcmpe_f64, 1, 1); + INSN(vcmpe_f32, 1, 0); + INSN( vcmp_f64, 0, 1); + INSN( vcmp_f32, 0, 0); +#undef INSN + +//Move FPSCR to ARM register +void vmrs(Register Rt, Condition cond = C_DFLT) { + starti; + f(cond, 31, 28), f(0b111011110001, 27, 16), rf(Rt, 12), f(0b101000010000, 11, 0); +} + +//Move ARM register to FPSCR +void vmsr(Register Rt, Condition cond = C_DFLT) { + starti; + f(cond, 31, 28), f(0b111011100001, 27, 16), rf(Rt, 12), f(0b101000010000, 11, 0); +} + +// TODO These instructions use round towards zero mode. It is possible +// for the mode to be taken from the FPSCR however it doesn't do it currently +#define INSN(NAME, decode2, b19, op, is64bitRd, is64bitRm, sz) \ + void NAME(FloatRegister Rd, FloatRegister Rm, Condition cond = C_DFLT) { \ + starti; \ + fp_instr_base(sz, cond); \ + f(0b1011, 23, 20), f(b19, 19), f(decode2, 18, 16), f(op, 7), f(0b100, 6, 4); \ + /* double register passed (see 'd0'-'dN' encoding), not reencode it's number */ \ + fp_rencode(Rd, false, 12, 22); \ + fp_rencode(Rm, false, 0, 5); \ + } + INSN(vcvt_s32_f32, 0b101, 1, 1, 0, 0, 0); + INSN(vcvt_s32_f64, 0b101, 1, 1, 0, 1, 1); + INSN(vcvt_u32_f32, 0b100, 1, 1, 0, 0, 0); + INSN(vcvt_u32_f64, 0b100, 1, 1, 0, 1, 1); + + INSN(vcvt_f64_s32, 0b000, 1, 1, 1, 0, 1); + INSN(vcvt_f64_u32, 0b000, 1, 0, 1, 0, 1); + INSN(vcvt_f32_s32, 0b000, 1, 1, 0, 0, 0); + INSN(vcvt_f32_u32, 0b000, 1, 0, 0, 0, 0); + + INSN(vcvt_f32_f64, 0b111, 0, 1, 0, 1, 1); + INSN(vcvt_f64_f32, 0b111, 0, 1, 1, 0, 0); +#undef INSN + +//Vector load/store + private: + void fp_ldst_instr(int decode, bool is64bit, const Address& adr, Condition cond); + public: + +#define INSN(NAME, decode, is64bit) \ + void NAME(FloatRegister Vd, const Address &adr, Condition cond = C_DFLT) { \ + starti; \ + fp_ldst_instr(decode, is64bit, adr, cond); \ + /* double register passed (see 'd0'-'dN' encoding), not reencode it's number */ \ + fp_rencode(Vd, false, 12, 22); \ + } \ + void NAME(FloatRegister Vd, address dest, Condition cond = C_DFLT) { \ + long offset = dest - pc(); \ + NAME(Vd, Address(r15_pc, offset), cond); \ + } \ + void NAME(FloatRegister Vd, address dest, relocInfo::relocType rtype, \ + Condition cond = C_DFLT) { \ + guarantee(rtype == relocInfo::internal_word_type, \ + "only internal_word_type relocs make sense here"); \ + NAME(Vd, InternalAddress(dest), cond); \ + } \ + void NAME(FloatRegister Vd, Label &L, Condition cond = C_DFLT) { \ + wrap_label(Vd, L, cond, &Assembler::NAME); \ + } + INSN(vstr_f64, 0b10000, 1); + INSN(vstr_f32, 0b10000, 0); + INSN(vldr_f64, 0b10001, 1); + INSN(vldr_f32, 0b10001, 0); +#undef INSN + + private: + enum fp_mode { ia_wb, ia, db_wb }; + void fp_ldst_mul(Register Rn, uint32_t regset, bool load, bool is64bit, enum fp_mode mode, Condition cond); + public: +#define INSN(NAME, EXT, is64bit, load) \ + inline void NAME##ia##EXT(Register Rn, unsigned regset, bool wb = true, \ + Condition cond = C_DFLT) { \ + fp_ldst_mul(Rn, regset, load, is64bit, \ + (enum fp_mode)( ia_wb + ( wb?0:1 )), cond); \ + } \ + inline void NAME##db##EXT(Register Rn, unsigned regset, Condition cond = C_DFLT) { \ + fp_ldst_mul(Rn, regset, load, is64bit, db_wb, cond); \ + } + INSN(vldm, _f32, 0, 1); + INSN(vldm, _f64, 1, 1); + INSN(vstm, _f32, 0, 0); + INSN(vstm, _f64, 1, 0); +#undef INSN + +#undef ZERO_ADDR_REG +#undef ONES_ADDR_REG + +/* SIMD extensions + * + * We just use FloatRegister in the following. They are exactly the same + * as SIMD registers. + */ + public: + enum SIMD_Align { + ALIGN_STD = 0b00, ALIGN_64 = 0b01, ALIGN_128 = 0b10, ALIGN_256 = 0b11 + }; + // multiple single elements +private: + void simd_ldst(FloatRegister, unsigned type, unsigned size, unsigned xfer_size, + const Address &addr, enum SIMD_Align align, unsigned encode); +public: +#define INSN(NAME, size, encode) \ + inline void NAME(FloatRegister Dd, const Address &addr, enum SIMD_Align align) { \ + simd_ldst(Dd, 0b0111, size, 1, addr, align, encode); \ + } \ + inline void NAME(FloatRegister Dd, FloatRegister Dd1, const Address &addr, \ + enum SIMD_Align align) { \ + assert(Dd->successor(FloatRegisterImpl::DOUBLE) == Dd1, "Must be consecutive"); \ + simd_ldst(Dd, 0b1010, size, 2, addr, align, encode); \ + } \ + inline void NAME(FloatRegister Dd, FloatRegister Dd1, FloatRegister Dd2, \ + const Address &addr, enum SIMD_Align align) { \ + assert(Dd->successor(FloatRegisterImpl::DOUBLE) == Dd1, "Must be consecutive"); \ + assert(Dd1->successor(FloatRegisterImpl::DOUBLE) == Dd2, "Must be consecutive"); \ + simd_ldst(Dd, 0b0110, size, 3, addr, align, encode); \ + } \ + inline void NAME(FloatRegister Dd, FloatRegister Dd1, FloatRegister Dd2, \ + FloatRegister Dd3, const Address &addr, enum SIMD_Align align) { \ + assert(Dd->successor(FloatRegisterImpl::DOUBLE) == Dd1, "Must be consecutive"); \ + assert(Dd1->successor(FloatRegisterImpl::DOUBLE) == Dd2, "Must be consecutive"); \ + assert(Dd2->successor(FloatRegisterImpl::DOUBLE) == Dd3, "Must be consecutive"); \ + simd_ldst(Dd, 0b0010, size, 4, addr, align, encode); \ + } + INSN(vld1_8, 0b00, 0b10); + INSN(vld1_16, 0b01, 0b10); + INSN(vld1_32, 0b10, 0b10); + INSN(vld1_64, 0b11, 0b10); + INSN(vst1_8, 0b00, 0b00); + INSN(vst1_16, 0b01, 0b00); + INSN(vst1_32, 0b10, 0b00); + INSN(vst1_64, 0b11, 0b00); +#undef INSN + + // single element to one lane +private: + void simd_ldst_single(FloatRegister Rd, unsigned size, unsigned index, + const Address &addr, bool align, unsigned encode); +public: +#define INSN(NAME, size, encode) \ + inline void NAME(FloatRegister Dd, unsigned index, const Address &addr, bool align) { \ + simd_ldst_single(Dd, size, index, addr, align, encode); \ + } + INSN(vld1_8, 0b00, 0b10); + INSN(vld1_16, 0b01, 0b10); + INSN(vld1_32, 0b10, 0b10); + INSN(vst1_8, 0b00, 0b00); + INSN(vst1_16, 0b01, 0b00); + INSN(vst1_32, 0b10, 0b00); +#undef INSN + +private: + void simd_vmov(FloatRegister Dd, unsigned index, Register Rt, bool advsimd, + unsigned index_bits, unsigned bit20, unsigned opc, Condition cond); +public: +#define INSN(NAME, advsimd, opc, index_bits) \ + inline void NAME(FloatRegister Rd, unsigned index, Register Rt, \ + Condition cond = Assembler::AL) { \ + simd_vmov(Rd, index, Rt, advsimd, index_bits, 0, opc, cond); \ + } + INSN(vmov_8, true, 0b1000, 2); + INSN(vmov_16, true, 0b0001, 1); + INSN(vmov_32, false, 0b0000, 0); +#undef INSN +#define INSN(NAME, advsimd, opc, index_bits) \ + inline void NAME(Register Rt, FloatRegister Rd, unsigned index, \ + Condition cond = Assembler::AL) { \ + simd_vmov(Rd, index, Rt, advsimd, index_bits, 1, opc, cond); \ + } + INSN(vmov_8s, true, 0b01000, 3); + INSN(vmov_16s, true, 0b00001, 2); + INSN(vmov_8u, true, 0b11000, 3); + INSN(vmov_16u, true, 0b10001, 2); + INSN(vmov_32, false, 0b00000, 1); +#undef INSN + +private: + void simd_vmov(FloatRegister Dd, unsigned imm, unsigned q, unsigned op_cmode); +public: +#define INSN(NAME, q, op_cmode) \ + inline void NAME(FloatRegister Dd, unsigned imm) { \ + simd_vmov(Dd, imm, q, op_cmode); \ + } + INSN(vmov_64_8, 0, 0b01110); + INSN(vmov_64_16, 0, 0b01000); + INSN(vmov_64_32, 0, 0b00000); + INSN(vmov_128_8, 1, 0b01110); + INSN(vmov_128_16, 1, 0b01000); + INSN(vmov_128_32, 1, 0b00000); +#undef INSN + +private: + void simd_logicalop(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, unsigned q, + unsigned a, unsigned b, unsigned u, unsigned c); +public: +#define INSN(NAME, q, a, b, u, c) \ + inline void NAME(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm) { \ + simd_logicalop(Dd, Dn, Dm, q, a, b, u, c); \ + } + INSN(veor_64, 0, 0b0001, 1, 1, 0b00); + INSN(veor_128, 1, 0b0001, 1, 1, 0b00); + INSN(vand_64, 0, 0b0001, 1, 0, 0b00); + INSN(vand_128, 1, 0b0001, 1, 0, 0b00); + INSN(vorr_64, 0, 0b0001, 1, 0, 0b10); + INSN(vorr_128, 1, 0b0001, 1, 0, 0b10); +#undef INSN + + // vmov is actually a vorr +#define vmov_64(Dd, Dm) vorr_64(Dd, Dm, Dm) +#define vmov_128(Qd, Qm) vorr_128(Qd, Qm, Qm) + +private: + void simd_vmul(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, + unsigned bit24, unsigned bits109, unsigned size, unsigned mul, unsigned bit6); +public: +#define INSN(NAME, bit24, bit9, size, mul, bit6, bit10) \ + inline void NAME(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm) { \ + simd_vmul(Dd, Dn, Dm, bit24, (bit10<<1)|bit9, size, mul, bit6); \ + } + INSN(vmul_64_8, 0, 0, 0b00, 1, 0, 0); + INSN(vmul_64_16, 0, 0, 0b01, 1, 0, 0); + INSN(vmul_64_32, 0, 0, 0b10, 1, 0, 0); + INSN(vmulp_64_8, 1, 0, 0b00, 1, 0, 0); + INSN(vmul_128_8, 0, 0, 0b00, 1, 1, 0); + INSN(vmul_128_16, 0, 0, 0b01, 1, 1, 0); + INSN(vmul_128_32, 0, 0, 0b10, 1, 1, 0); + INSN(vmulp_128_8, 1, 0, 0b00, 1, 1, 0); + INSN(vmull_8s, 0, 0, 0b00, 0, 0, 1); + INSN(vmull_16s, 0, 0, 0b01, 0, 0, 1); + INSN(vmull_32s, 0, 0, 0b10, 0, 0, 1); + INSN(vmull_8u, 1, 0, 0b00, 0, 0, 1); + INSN(vmull_16u, 1, 0, 0b01, 0, 0, 1); + INSN(vmull_32u, 1, 0, 0b10, 0, 0, 1); + INSN(vmullp_8, 0, 1, 0b00, 0, 0, 1); + INSN(vmul_64_f32, 1, 0, 0b00, 1, 0, 1); + INSN(vmul_128_f32,1, 0, 0b00, 1, 1, 1); +#undef INSN + +private: + void simd_vuzp(FloatRegister Dd, FloatRegister Dm, unsigned size, unsigned q); +public: +#define INSN(NAME, size, q) \ + inline void NAME(FloatRegister Dd, FloatRegister Dm) { \ + simd_vuzp(Dd, Dm, size, q); \ + } + INSN(vuzp_64_8, 0b00, 0); + INSN(vuzp_64_16, 0b01, 0); + INSN(vuzp_64_32, 0b10, 0); + INSN(vuzp_128_8, 0b00, 1); + INSN(vuzp_128_16, 0b01, 1); + INSN(vuzp_128_32, 0b10, 1); +#undef INSN + +private: + void simd_vshl(FloatRegister Dd, FloatRegister Dm, unsigned imm, + unsigned q, unsigned u, unsigned encode); +public: +#define INSN(NAME, size, q, u, encode, checkDd) \ + inline void NAME(FloatRegister Dd, FloatRegister Dm, unsigned imm) { \ + assert(!checkDd || (Dd->encoding() & 2) == 0, "Odd register"); \ + unsigned encode_eff = encode; \ + unsigned u_eff = u; \ + imm &= size == 6 ? 0x3f : 0x1f; /* per jvms */ \ + if (imm >= (1u << size)) { /* vshl cannot encode shift by size or more... */ \ + encode_eff = 0b0000; /* .. change to equivalent vshr (actually set to 0) */ \ + u_eff = 1; \ + imm = (1u << size); \ + } \ + simd_vshl(Dd, Dm, imm|(1u< (1u << size)) { \ + imm = 1u << size; /* saturate shift */ \ + } else { /* encode the imm per ARM spec */ \ + imm = (1u << size+1) - imm; \ + } \ + simd_vshl(Dd, Dm, imm, q, u, encode_eff); \ + } + INSN(vshr_64_u8, 3, 0, 1); + INSN(vshr_64_u16, 4, 0, 1); + INSN(vshr_64_u32, 5, 0, 1); + INSN(vshr_64_u64, 6, 0, 1); + INSN(vshr_128_u8, 3, 1, 1); + INSN(vshr_128_u16, 4, 1, 1); + INSN(vshr_128_u32, 5, 1, 1); + INSN(vshr_128_u64, 6, 1, 1); + INSN(vshr_64_s8, 3, 0, 0); + INSN(vshr_64_s16, 4, 0, 0); + INSN(vshr_64_s32, 5, 0, 0); + INSN(vshr_64_s64, 6, 0, 0); + INSN(vshr_128_s8, 3, 1, 0); + INSN(vshr_128_s16, 4, 1, 0); + INSN(vshr_128_s32, 5, 1, 0); + INSN(vshr_128_s64, 6, 1, 0); +#undef INSN +#define INSN(NAME, encode, size, q) \ + inline void NAME(FloatRegister Dd, FloatRegister Dm, unsigned imm) { \ + simd_vshl(Dd, Dm, imm|(1u<encoding_nocheck(); \ + simd_dup(Dd, as_FloatRegister(m_num & ~1), m_num & 1, q, 0b00); \ + } + INSN(vdups_64, 0); + INSN(vdups_128, 1); +#undef INSN + +private: + void simd_neg(FloatRegister Dd, FloatRegister Dm, unsigned q, unsigned size); +public: +#define INSN(NAME, q, size) \ + inline void NAME(FloatRegister Dd, FloatRegister Dm) { \ + simd_neg(Dd, Dm, q, size); \ + } + INSN(vneg_64_s8, 0, 0b00); + INSN(vneg_64_s16, 0, 0b01); + INSN(vneg_64_s32, 0, 0b10); + INSN(vneg_128_s8, 1, 0b00); + INSN(vneg_128_s16, 1, 0b01); + INSN(vneg_128_s32, 1, 0b10); +#undef INSN + +private: + void simd_mvn(FloatRegister Dd, FloatRegister Dm, unsigned q); +public: +#define INSN(NAME, q) \ + inline void NAME(FloatRegister Dd, FloatRegister Dm) { \ + simd_mvn(Dd, Dm, q); \ + } + INSN(vmvn_64, 0); + INSN(vmvn_128, 1); +#undef INSN + + // three registers of the same length +private: + void simd_insn(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, + unsigned q, unsigned a, unsigned b, unsigned u, unsigned c); +public: +#define INSN(NAME, q, a, b, u, c) \ + inline void NAME(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm) { \ + simd_insn(Dd, Dn, Dm, q, a, b, u, c); \ + } +#define INSN_GR(NAME, a, b, u) \ + INSN(NAME##_64_8, 0, a, b, u, 0b00) \ + INSN(NAME##_64_16, 0, a, b, u, 0b01) \ + INSN(NAME##_64_32, 0, a, b, u, 0b10) \ + INSN(NAME##_64_64, 0, a, b, u, 0b11) \ + INSN(NAME##_128_8, 1, a, b, u, 0b00) \ + INSN(NAME##_128_16, 1, a, b, u, 0b01) \ + INSN(NAME##_128_32, 1, a, b, u, 0b10) \ + INSN(NAME##_128_64, 1, a, b, u, 0b11) + + INSN_GR(vadd, 0b1000, 0b0, 0b0); + INSN(vadd_64_f32, 0, 0b1101, 0b0, 0b0, 0b00); + INSN(vadd_128_f32, 1, 0b1101, 0b0, 0b0, 0b00); + INSN_GR(vsub, 0b1000, 0b0, 0b1); + INSN(vsub_64_f32, 0, 0b1101, 0b0, 0b0, 0b10); + INSN(vsub_128_f32, 1, 0b1101, 0b0, 0b0, 0b10); + +#undef INSN_GR +#undef INSN + + // three registers of different length +private: + void simd_insn(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, + unsigned qn, unsigned a, unsigned b, unsigned u); +public: +#define INSN(NAME, qn, a, b, u) \ + inline void NAME(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm) { \ + simd_insn(Dd, Dn, Dm, qn, a, b, u); \ + } +#define INSN_GR(NAME, qn, a) \ + INSN(NAME##_8u, qn, a, 0b00, 1) \ + INSN(NAME##_16u, qn, a, 0b01, 1) \ + INSN(NAME##_32u, qn, a, 0b10, 1) \ + INSN(NAME##_8s, qn, a, 0b00, 0) \ + INSN(NAME##_16s, qn, a, 0b01, 0) \ + INSN(NAME##_32s, qn, a, 0b10, 0) + + INSN_GR(vaddw, 1, 0b0001); + INSN_GR(vaddl, 0, 0b0000); +#undef INSN_GR +#undef INSN + + // VEXT, the instruction out of any class +private: + void simd_vext(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, unsigned q, unsigned imm); +public: +#define INSN(NAME, q) \ + inline void NAME(FloatRegister Dd, FloatRegister Dn, FloatRegister Dm, unsigned imm) { \ + simd_vext(Dd, Dn, Dm, q, imm); \ + } + INSN(vext_64, 0u); + INSN(vext_128, 1u); +#undef INSN + +public: + +#define INSN(NAME, r) \ + inline void NAME(Address a) { \ + starti; \ + f(0b1111, 31, 28); \ + f(0b0101, 27, 24), f(0b01, 21, 20); \ + f(0b1111, 15, 12); \ + f(r, 22); \ + rf(a.base(), 16); \ + if (a.get_mode() == Address::imm) { \ + f(a.offset() >= 0 ? 1 : 0, 23); \ + f(a.offset() >= 0 ? a.offset() : -a.offset(), 11, 0); \ + } else if (a.get_mode() == Address::reg) { \ + assert(a.get_wb_mode() == Address::off, "must be"); \ + assert(!a.shift().is_register(), "must be"); \ + f(a.op() == Address::ADD ? 1 : 0, 23); \ + rf(a.index(), 0); \ + f(0, 4); \ + f(a.shift().shift(), 11, 7); \ + f(a.shift().kind(), 6, 5); \ + } else { \ + ShouldNotReachHere(); \ + } \ + } + INSN(pld, 1); + INSN(pldw, 0); +#undef INSN + +#define INSN(NAME, size, c) \ + inline void NAME(Register Rd, Register Rn, Register Rm, Condition cond = C_DFLT) { \ + starti; \ + assert(VM_Version::features() & FT_CRC32, "Instruction is not supported by CPU"); \ + f(cond, 31, 28), f(0b00010, 27, 23), f(size, 22, 21), f(0, 20), rf(Rn, 16), rf(Rd, 12); \ + f(0b00, 11, 10), c ? f(0b1, 9) : f(0b0, 9), f(0b00100, 8, 4), rf(Rm, 0); \ + } + INSN(crc32b, 0, 0); + INSN(crc32h, 1, 0); + INSN(crc32w, 2, 0); + INSN(crc32cb, 0, 1); + INSN(crc32ch, 1, 1); + INSN(crc32cw, 2, 1); +#undef INSN + +#define INSN(NAME, opc) \ + inline void NAME(FloatRegister Vd, FloatRegister Vm) { \ + starti; \ + f(0b111100111, 31, 23), f(0b110000, 21, 16), f(0, 4); \ + f(opc, 11, 6), fp_rencode(Vd, false, 12, 22), fp_rencode(Vm, false, 0, 5); \ + } + + INSN(aese, 0b001100); + INSN(aesd, 0b001101); + INSN(aesmc, 0b001110); + INSN(aesimc, 0b001111); + +#undef INSN + + Assembler(CodeBuffer* code) : AbstractAssembler(code) {} + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + ShouldNotCallThis(); + return RegisterOrConstant(); + } + + // Stack overflow checking + virtual void bang_stack_with_offset(int offset); + + // Immediate values checks and transformations + + static uint32_t encode_imm12(int imm); + static int decode_imm12(uint32_t imm12); + static bool is_valid_for_imm12(int imm); + + static bool is_valid_for_offset_imm(int imm, int nbits) { + return uabs(imm) < (1u << nbits); + } + + static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm); + static bool operand_valid_for_add_sub_immediate(int imm); + static bool operand_valid_for_add_sub_immediate(unsigned imm); + static bool operand_valid_for_add_sub_immediate(unsigned long imm); + static bool operand_valid_for_add_sub_immediate(jlong imm); + static bool operand_valid_for_float_immediate(float imm); + static bool operand_valid_for_double_immediate(double imm); + + void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); + void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); + + // useful to revert back the effect of post/pre addressing modifications + // applied to the base register + void compensate_addr_offset(const Address &adr, Condition cond) { + compensate_addr_offset(adr.base(), adr.index(), adr.shift(), adr.op() == Address::ADD, cond); + } + void compensate_addr_offset(Register Rd, Register Roff, shift_op shift, bool isAdd, Condition cond) { + shift_op shift_back; + + if (shift.is_register()) { + switch (shift.kind()) { + case shift_op::LSL: + case shift_op::LSR: + shift_back = asr(shift.reg()); + break; + case shift_op::ASR: + shift_back = lsl(shift.reg()); + break; + case shift_op::ROR: + Unimplemented(); // need a temp register here + break; + default: + ShouldNotReachHere(); + } + } else { + switch (shift.kind()) { + case shift_op::LSL: + case shift_op::LSR: + shift_back = asr(shift.shift()); + break; + case shift_op::ASR: + shift_back = lsl(shift.shift()); + break; + case shift_op::ROR: + shift_back = ror(32-shift.shift()); + break; + default: + ShouldNotReachHere(); + } + } + if (isAdd) + sub(Rd, Rd, Roff, shift_back, cond); + else + add(Rd, Rd, Roff, shift_back, cond); + } +}; + +inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a, + Assembler::Membar_mask_bits b) { + return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b)); +} + +Instruction_aarch32::~Instruction_aarch32() { + assem->emit(); +} + +#undef starti + +// Invert a condition +inline const Assembler::Condition operator~(const Assembler::Condition cond) { + return Assembler::Condition(int(cond) ^ 1); +} + +class BiasedLockingCounters; + +extern "C" void das(uint64_t start, int len); + +#endif // CPU_AARCH32_VM_ASSEMBLER_AARCH32_HPP --- /dev/null 2018-09-25 19:24:12.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/assembler_aarch32.inline.hpp 2018-09-25 19:24:12.000000000 +0300 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_ASSEMBLER_AARCH32_INLINE_HPP +#define CPU_AARCH32_VM_ASSEMBLER_AARCH32_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +#endif // CPU_AARCH32_VM_ASSEMBLER_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:24:13.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/bytecodes_aarch32.cpp 2018-09-25 19:24:13.000000000 +0300 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/bytecodes.hpp" + --- /dev/null 2018-09-25 19:24:14.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/bytecodes_aarch32.hpp 2018-09-25 19:24:14.000000000 +0300 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_BYTECODES_AARCH32_HPP +#define CPU_AARCH32_VM_BYTECODES_AARCH32_HPP + +// No AArch32 specific bytecodes + +#endif // CPU_AARCH32_VM_BYTECODES_AARCH32_HPP --- /dev/null 2018-09-25 19:24:15.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/bytes_aarch32.hpp 2018-09-25 19:24:15.000000000 +0300 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_BYTES_AARCH32_HPP +#define CPU_AARCH32_VM_BYTES_AARCH32_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering. + // Since ARMv6 unaligned short and word accesses are handled by hardware. + // However, unaligned double-word access causes kernel trap and software processing, + // so we turn it to fast unalinged word access. + static inline u2 get_native_u2(address p) { return *(u2*)p; } + static inline u4 get_native_u4(address p) { return *(u4*)p; } + static inline u8 get_native_u8(address p) { + if (!(uintptr_t(p) & 3)) { + return *(u8*)p; + } + u4 *const a = (u4*) p; + return (u8(a[1]) << 32) | a[0]; + } + + static inline void put_native_u2(address p, u2 x) { *(u2*)p = x; } + static inline void put_native_u4(address p, u4 x) { *(u4*)p = x; } + static inline void put_native_u8(address p, u8 x) { *(u8*)p = x; } + + + // Efficient reading and writing of unaligned unsigned data in Java + // byte ordering (i.e. big-endian ordering). Byte-order reversal is + // needed since AArch32 use little-endian format. + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } + static inline void put_Java_u8(address p, u8 x) { + const u8 nx = swap_u8(x); + if (!(uintptr_t(p) & 3)) { + *(u8*)p = nx; + } else { + u4 *const a = (u4*) p; + a[0] = nx; + a[1] = nx >> 32; + } + } + + // Efficient swapping of byte ordering + static inline u2 swap_u2(u2 x); // compiler-dependent implementation + static inline u4 swap_u4(u4 x); // compiler-dependent implementation + static inline u8 swap_u8(u8 x); +}; + + +// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] + +#include OS_CPU_HEADER_INLINE(bytes) + +#endif // CPU_AARCH32_VM_BYTES_AARCH32_HPP --- /dev/null 2018-09-25 19:24:16.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_CodeStubs_aarch32.cpp 2018-09-25 19:24:16.000000000 +0300 @@ -0,0 +1,531 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "nativeInst_aarch32.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch32.inline.hpp" + + +#define __ ce->masm()-> + +#define should_not_reach_here() should_not_reach_here_line(__FILE__, __LINE__) + +void CounterOverflowStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + Metadata *m = _method->as_constant_ptr()->as_metadata(); + __ mov_metadata(rscratch1, m); + ce->store_parameter(rscratch1, 1); + ce->store_parameter(_bci, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) + : _throw_index_out_of_bounds_exception(false), _index(index), _array(array) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index) + : _throw_index_out_of_bounds_exception(true), _index(index), _array(NULL) { + assert(info != NULL, "must have info"); + _info = new CodeEmitInfo(info); +} + +void RangeCheckStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_info->deoptimize_on_exception()) { + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); + return; + } + + if (_index->is_cpu_register()) { + __ mov(rscratch1, _index->as_register()); + } else { + __ mov(rscratch1, _index->as_jint()); + } + Runtime1::StubID stub_id; + if (_throw_index_out_of_bounds_exception) { + stub_id = Runtime1::throw_index_exception_id; + } else { + assert(_array != NULL, "sanity"); + __ mov(rscratch2, _array->as_pointer_register()); + stub_id = Runtime1::throw_range_check_failed_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(stub_id)), NULL); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) { + _info = new CodeEmitInfo(info); +} + +void PredicateFailedStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + +void DivByZeroStub::emit_code(LIR_Assembler* ce) { + if (_offset != -1) { + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + } + __ bind(_entry); + __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); +#ifdef ASSERT + __ should_not_reach_here(); +#endif +} + + + +// Implementation of NewInstanceStub + +NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) { + _result = result; + _klass = klass; + _klass_reg = klass_reg; + _info = new CodeEmitInfo(info); + assert(stub_id == Runtime1::new_instance_id || + stub_id == Runtime1::fast_new_instance_id || + stub_id == Runtime1::fast_new_instance_init_check_id, + "need new_instance id"); + _stub_id = stub_id; +} + + + +void NewInstanceStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + __ mov(r3, _klass_reg->as_register()); + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == r0, "result must in r0,"); + __ b(_continuation); +} + + +// Implementation of NewTypeArrayStub + +// Implementation of NewTypeArrayStub + +NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _length = length; + _result = result; + _info = new CodeEmitInfo(info); +} + + +void NewTypeArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == r6, "length must in r6,"); + assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == r0, "result must in r0"); + __ b(_continuation); +} + + +// Implementation of NewObjectArrayStub + +NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) { + _klass_reg = klass_reg; + _result = result; + _length = length; + _info = new CodeEmitInfo(info); +} + + +void NewObjectArrayStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + assert(_length->as_register() == r6, "length must in r6"); + assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + assert(_result->as_register() == r0, "result must in r0"); + __ b(_continuation); +} +// Implementation of MonitorAccessStubs + +MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) +: MonitorAccessStub(obj_reg, lock_reg) +{ + _info = new CodeEmitInfo(info); +} + + +void MonitorEnterStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + __ bind(_entry); + ce->store_parameter(_obj_reg->as_register(), 1); + ce->store_parameter(_lock_reg->as_register(), 0); + Runtime1::StubID enter_id; + if (ce->compilation()->has_fpu_code()) { + enter_id = Runtime1::monitorenter_id; + } else { + enter_id = Runtime1::monitorenter_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id))); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + __ b(_continuation); +} + + +void MonitorExitStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + if (_compute_lock) { + // lock_reg was destroyed by fast unlocking attempt => recompute it + ce->monitor_address(_monitor_ix, _lock_reg); + } + ce->store_parameter(_lock_reg->as_register(), 0); + // note: non-blocking leaf routine => no call info needed + Runtime1::StubID exit_id; + if (ce->compilation()->has_fpu_code()) { + exit_id = Runtime1::monitorexit_id; + } else { + exit_id = Runtime1::monitorexit_nofpu_id; + } + __ adr(lr, _continuation); + __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); +} + + +// Implementation of patching: +// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes) +// - Replace original code with a call to the stub +// At Runtime: +// - call to stub, jump to runtime +// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object) +// - in runtime: after initializing class, restore original code, reexecute instruction + +int PatchingStub::_patch_info_offset = 0; + +void PatchingStub::align_patch_site(MacroAssembler* masm) { +} + +void PatchingStub::emit_code(LIR_Assembler* ce) { + // NativeCall::instruction_size is dynamically calculated based on CPU, + // armv7 -> 3 instructions, armv6 -> 5 instructions. Initialize _patch_info_offset + // here, when CPU is determined already. + if (!_patch_info_offset) + _patch_info_offset = -NativeCall::instruction_size; + assert(_patch_info_offset == -NativeCall::instruction_size, "must not change"); + assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, "not enough room for call"); + + Label call_patch; + + // static field accesses have special semantics while the class + // initializer is being run so we emit a test which can be used to + // check that this code is being executed by the initializing + // thread. + address being_initialized_entry = __ pc(); + if (CommentedAssembly) { + __ block_comment(" patch template"); + } + address start = __ pc(); + if (_id == load_klass_id) { + // produce a copy of the load klass instruction for use by the being initialized case + int metadata_index = -1; + CodeSection* cs = __ code_section(); + RelocIterator iter(cs, (address)_pc_start, (address)_pc_start+1); + while (iter.next()) { + if (iter.type() == relocInfo::metadata_type) { + metadata_Relocation* r = iter.metadata_reloc(); + assert(metadata_index == -1, "uninitalized yet"); + metadata_index = r->metadata_index(); + break; + } + } + assert(metadata_index != -1, "initialized"); + __ relocate(metadata_Relocation::spec(metadata_index)); + __ patchable_load(_obj, __ pc()); + while ((intx) __ pc() - (intx) start < NativeCall::instruction_size) { + __ nop(); + } +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + assert(*(_pc_start + i) == *(start + i), "should be the same code"); + } +#endif + } else if (_id == load_mirror_id || _id == load_appendix_id) { + // produce a copy of the load mirror instruction for use by the being + // initialized case + int oop_index = -1; + CodeSection* cs = __ code_section(); + RelocIterator iter(cs, (address)_pc_start, (address)_pc_start+1); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop_Relocation* r = iter.oop_reloc(); + assert(oop_index == -1, "uninitalized yet"); + oop_index = r->oop_index(); + break; + } + } + assert(oop_index != -1, "initialized"); + __ relocate(oop_Relocation::spec(oop_index)); + __ patchable_load(_obj, __ pc()); + while ((intx) __ pc() - (intx) start < NativeCall::instruction_size) { + __ nop(); + } +#ifdef ASSERT + for (int i = 0; i < _bytes_to_copy; i++) { + assert(*(_pc_start + i) == *(start + i), "should be the same code"); + } +#endif + } else if (_id == access_field_id) { + // make a copy the code which is going to be patched. + address const_addr = (address) -1; + CodeSection* cs = __ code_section(); + RelocIterator iter(cs, (address)_pc_start, (address)_pc_start+1); + while (iter.next()) { + if (iter.type() == relocInfo::section_word_type) { + section_word_Relocation* r = iter.section_word_reloc(); + assert(const_addr == (address) -1, "uninitalized yet"); + const_addr = r->target(); + break; + } + } + assert(const_addr != (address) -1, "initialized"); + __ relocate(section_word_Relocation::spec(const_addr, CodeBuffer::SECT_CONSTS)); + __ patchable_load(rscratch1, const_addr); + while ((intx) __ pc() - (intx) start < NativeCall::instruction_size) { + __ nop(); + } +#ifdef ASSERT + intptr_t* from = (intptr_t*) start; + intptr_t* to = (intptr_t*) _pc_start; + assert(from[0] == to[0], "should be same (nop)"); + assert(from[1] == to[1], "should be same (barrier)"); + //TODO: update + //XXX: update nativeInst_aarch32..? + #if 0 + assert(NativeFarLdr::from((address) (from + 2))->data_addr() + == NativeFarLdr::from((address) (to + 2))->data_addr(), + "should load from one addr)"); +#endif + for (int i = 4 * NativeInstruction::arm_insn_sz; i < _bytes_to_copy; i++) { + assert(*(_pc_start + i) == *(start + i), "should be the same code"); + } +#endif + } else { + ShouldNotReachHere(); + } + + int bytes_to_skip = _bytes_to_copy; + + if (_id == load_mirror_id) { + int offset = __ offset(); + if (CommentedAssembly) { + __ block_comment(" being_initialized check"); + } + assert(_obj != noreg, "must be a valid register"); + // Load without verification to keep code size small. We need it because + // begin_initialized_entry_offset has to fit in a byte. Also, we know it's not null. + __ ldr(rscratch1, Address(_obj, java_lang_Class::klass_offset_in_bytes())); + __ ldr(rscratch1, Address(rscratch1, InstanceKlass::init_thread_offset())); + __ cmp(rthread, rscratch1); + __ b(call_patch, Assembler::NE); + + // access_field patches may execute the patched code before it's + // copied back into place so we need to jump back into the main + // code of the nmethod to continue execution. + __ b(_patch_site_continuation); + // make sure this extra code gets skipped + bytes_to_skip += __ offset() - offset; + } + + // Now emit the patch record telling the runtime how to find the + // pieces of the patch. We only need 3 bytes but it has to be + // aligned as an instruction so emit 4 bytes. + int sizeof_patch_record = 4; + bytes_to_skip += sizeof_patch_record; + + // emit the offsets needed to find the code to patch + int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record; + + __ emit_int8(0); + __ emit_int8(being_initialized_entry_offset); + __ emit_int8(bytes_to_skip); + __ emit_int8(0); + + address patch_info_pc = __ pc(); + + address entry = __ pc(); + NativeGeneralJump::insert_unconditional((address)_pc_start, entry); + address target = NULL; + relocInfo::relocType reloc_type = relocInfo::none; + switch (_id) { + case access_field_id: target = Runtime1::entry_for(Runtime1::access_field_patching_id); reloc_type = relocInfo::section_word_type; break; + case load_klass_id: target = Runtime1::entry_for(Runtime1::load_klass_patching_id); reloc_type = relocInfo::metadata_type; break; + case load_mirror_id: target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); reloc_type = relocInfo::oop_type; break; + case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); reloc_type = relocInfo::oop_type; break; + default: ShouldNotReachHere(); + } + __ bind(call_patch); + + if (CommentedAssembly) { + __ block_comment("patch entry point"); + } + __ mov(rscratch1, RuntimeAddress(target)); + __ bl(rscratch1); + // pad with nops to globally known upper bound of patch site size + while (patch_info_pc - __ pc() < _patch_info_offset) + __ nop(); + assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change, required by shared code"); + ce->add_call_info_here(_info); + int jmp_off = __ offset(); + __ b(_patch_site_entry); + // Add enough nops so deoptimization can overwrite the jmp above with a call + // and not destroy the world. + for (int j = __ offset() ; j < jmp_off + NativeCall::instruction_size; j += NativeInstruction::arm_insn_sz) { + __ nop(); + } + + CodeSection* cs = __ code_section(); + RelocIterator iter(cs, (address)_pc_start, (address)_pc_start+1); + relocInfo::change_reloc_info_for_address(&iter, (address)_pc_start, reloc_type, relocInfo::none); +} + + +void DeoptimizeStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + ce->store_parameter(_trap_request, 0); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); + ce->add_call_info_here(_info); + DEBUG_ONLY(__ should_not_reach_here()); +} + + +void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) { + address a; + if (_info->deoptimize_on_exception()) { + // Deoptimize, do not throw the exception, because it is probably wrong to do it here. + a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); + } else { + a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id); + } + + ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); + __ bind(_entry); + __ far_call(RuntimeAddress(a)); + ce->add_call_info_here(_info); + ce->verify_oop_map(_info); + debug_only(__ should_not_reach_here()); +} + + +void SimpleExceptionStub::emit_code(LIR_Assembler* ce) { + assert(__ rsp_offset() == 0, "frame size should be fixed"); + + __ bind(_entry); + // pass the object in a scratch register because all other registers + // must be preserved + if (_obj->is_cpu_register()) { + __ mov(rscratch1, _obj->as_register()); + } + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL); + ce->add_call_info_here(_info); + debug_only(__ should_not_reach_here()); +} + + +void ArrayCopyStub::emit_code(LIR_Assembler* ce) { + //---------------slow case: call to native----------------- + __ bind(_entry); + // Figure out where the args should go + // This should really convert the IntrinsicID to the Method* and signature + // but I don't know how to do that. + // + VMRegPair args[5]; + BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT}; + SharedRuntime::java_calling_convention(signature, args, 5, true); + + // push parameters + // (src, src_pos, dest, destPos, length) + Register r[5]; + r[0] = src()->as_register(); + r[1] = src_pos()->as_register(); + r[2] = dst()->as_register(); + r[3] = dst_pos()->as_register(); + r[4] = length()->as_register(); + + // next registers will get stored on the stack + for (int i = 0; i < 5 ; i++ ) { + VMReg r_1 = args[i].first(); + if (r_1->is_stack()) { + int st_off = r_1->reg2stack() * wordSize; + __ str (r[i], Address(sp, st_off)); + } else { + assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg "); + } + } + + ce->align_call(lir_static_call); + + ce->emit_static_call_stub(); + Address resolve(SharedRuntime::get_resolve_static_call_stub(), + relocInfo::static_call_type); + __ trampoline_call(resolve); + ce->add_call_info_here(info()); + +#ifndef PRODUCT + __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt)); + __ increment(Address(rscratch2)); +#endif + + __ b(_continuation); +} + +#undef __ --- /dev/null 2018-09-25 19:24:17.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_Defs_aarch32.hpp 2018-09-25 19:24:17.000000000 +0300 @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_C1_DEFS_AARCH32_HPP +#define CPU_AARCH32_VM_C1_DEFS_AARCH32_HPP + +// Native word offsets from memory address (little endian format) +enum { + pd_lo_word_offset_in_bytes = 0, + pd_hi_word_offset_in_bytes = BytesPerWord +}; + +// TODO: We should understand what values are correct for the following 3 flags +// relevant to floating point operations: +// - UseSSE +// Highest supported SSE instruction set on x86/x64. I believe we should +// set it to 0 in VM_Version::initialize(), like other non-x86 ports do. +// - RoundFPResults +// Indicates whether rounding is needed for floating point results +// - pd_strict_fp_requires_explicit_rounding +// The same as above but for the strictfp mode + +// Explicit rounding operations are not required to implement the strictfp mode +enum { + pd_strict_fp_requires_explicit_rounding = false +}; + +// Registers +enum { + // Number of registers used during code emission + pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, + pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, + + // Number of registers killed by calls + pd_nof_caller_save_cpu_regs_frame_map = 9, + + pd_nof_caller_save_fpu_regs_frame_map = pd_nof_fpu_regs_frame_map, + // The following two constants need to be defined since they are referenced + // from c1_FrameMap.hpp, but actually they are never used, so can be set to + // arbitrary values. + pd_nof_cpu_regs_reg_alloc = -1, + pd_nof_fpu_regs_reg_alloc = -1, + + // All the constants below are used by linear scan register allocator only. + // Number of registers visible to register allocator + pd_nof_cpu_regs_linearscan = pd_nof_cpu_regs_frame_map, + pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, + pd_nof_xmm_regs_linearscan = 0, + + // Register allocator specific register numbers corresponding to first/last + // CPU/FPU registers available for allocation + pd_first_cpu_reg = 0, + pd_last_cpu_reg = 8, + pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, + pd_last_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - 1, + // Register allocator specific register numbers corresponding to first/last + // CPU/FPU callee-saved registers. These constants are used in + // LinearScan::is_caller_save() only. + pd_first_callee_saved_cpu_reg = 4, + pd_last_callee_saved_cpu_reg = 11, + pd_first_callee_saved_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map/2, + pd_last_callee_saved_fpu_reg = pd_first_fpu_reg + pd_nof_fpu_regs_frame_map - 1 +}; + +// This flag must be in sync with how the floating point registers are stored +// on the stack by RegisterSaver::save_live_registers() method +// (sharedRuntime_aarch32.cpp) and save_live_registers() function +// (c1_Runtime1_aarch32.cpp). On AArch32 the floating point registers keep +// floats and doubles in their native form. No float to double conversion +// happens when the registers are stored on the stack. This is opposite to +// what happens on x86, where the FPU stack registers are 80 bits wide, +// and storing them in either 4 byte or 8 byte stack slot is a conversion +// operation. +enum { + pd_float_saved_as_double = false +}; + +#endif // CPU_AARCH32_VM_C1_DEFS_AARCH32_HPP --- /dev/null 2018-09-25 19:24:19.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_FpuStackSim_aarch32.cpp 2018-09-25 19:24:18.000000000 +0300 @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FpuStackSim.hpp" + +// No FPU stack on AArch32 --- /dev/null 2018-09-25 19:24:20.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_FpuStackSim_aarch32.hpp 2018-09-25 19:24:20.000000000 +0300 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_C1_FPUSTACKSIM_AARCH32_HPP +#define CPU_AARCH32_VM_C1_FPUSTACKSIM_AARCH32_HPP + +// No FPU stack on AArch32 + +#endif // CPU_AARCH32_VM_C1_FPUSTACKSIM_AARCH32_HPP --- /dev/null 2018-09-25 19:24:21.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_FrameMap_aarch32.cpp 2018-09-25 19:24:21.000000000 +0300 @@ -0,0 +1,257 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_LIR.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch32.inline.hpp" + +LIR_Opr FrameMap::r0_opr; +LIR_Opr FrameMap::r1_opr; +LIR_Opr FrameMap::r2_opr; +LIR_Opr FrameMap::r3_opr; +LIR_Opr FrameMap::r4_opr; +LIR_Opr FrameMap::r5_opr; +LIR_Opr FrameMap::r6_opr; +LIR_Opr FrameMap::r7_opr; +LIR_Opr FrameMap::r8_opr; +LIR_Opr FrameMap::r9_opr; +LIR_Opr FrameMap::r10_opr; +LIR_Opr FrameMap::r11_opr; +LIR_Opr FrameMap::r12_opr; +LIR_Opr FrameMap::r13_opr; +LIR_Opr FrameMap::r14_opr; +LIR_Opr FrameMap::r15_opr; + +LIR_Opr FrameMap::r0_oop_opr; +LIR_Opr FrameMap::r1_oop_opr; +LIR_Opr FrameMap::r2_oop_opr; +LIR_Opr FrameMap::r3_oop_opr; +LIR_Opr FrameMap::r4_oop_opr; +LIR_Opr FrameMap::r5_oop_opr; +LIR_Opr FrameMap::r6_oop_opr; +LIR_Opr FrameMap::r7_oop_opr; +LIR_Opr FrameMap::r8_oop_opr; +LIR_Opr FrameMap::r9_oop_opr; +LIR_Opr FrameMap::r10_oop_opr; +LIR_Opr FrameMap::r11_oop_opr; +LIR_Opr FrameMap::r12_oop_opr; +LIR_Opr FrameMap::r13_oop_opr; +LIR_Opr FrameMap::r14_oop_opr; +LIR_Opr FrameMap::r15_oop_opr; + +LIR_Opr FrameMap::r0_metadata_opr; +LIR_Opr FrameMap::r1_metadata_opr; +LIR_Opr FrameMap::r2_metadata_opr; +LIR_Opr FrameMap::r3_metadata_opr; +LIR_Opr FrameMap::r4_metadata_opr; +LIR_Opr FrameMap::r5_metadata_opr; + +LIR_Opr FrameMap::sp_opr; +LIR_Opr FrameMap::receiver_opr; + +LIR_Opr FrameMap::rscratch1_opr; +LIR_Opr FrameMap::rscratch2_opr; +LIR_Opr FrameMap::rscratch_long_opr; + +LIR_Opr FrameMap::long0_opr; +LIR_Opr FrameMap::long1_opr; +LIR_Opr FrameMap::long2_opr; +LIR_Opr FrameMap::fpu0_float_opr; +LIR_Opr FrameMap::fpu0_double_opr; + +LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, }; +LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, }; + +void FrameMap::initialize() { + assert(!_init_done, "must be called once"); + + int i = 0; + map_register(i, r0); r0_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r1); r1_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r2); r2_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r3); r3_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r4); r4_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r5); r5_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r6); r6_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r7); r7_opr = LIR_OprFact::single_cpu(i); i++; + map_register(i, r8); r8_opr = LIR_OprFact::single_cpu(i); i++; + // Mapping lines in this block may be arbitrarily mixed, but all allocatable + // registers should go above this comment, and unallocatable registers - + // below. + map_register(i, r9); r9_opr = LIR_OprFact::single_cpu(i); i++; // rscratch1 + map_register(i, r10); r10_opr = LIR_OprFact::single_cpu(i); i++; // rthread + map_register(i, r11); r11_opr = LIR_OprFact::single_cpu(i); i++; // rfp + map_register(i, r12); r12_opr = LIR_OprFact::single_cpu(i); i++; // rscratch2 + map_register(i, r13); r13_opr = LIR_OprFact::single_cpu(i); i++; // sp + map_register(i, r14); r14_opr = LIR_OprFact::single_cpu(i); i++; // lr + map_register(i, r15); r15_opr = LIR_OprFact::single_cpu(i); i++; // r15_pc + + // This flag must be set after all integer registers are mapped but before + // the first use of as_*_opr() methods. + _init_done = true; + + r0_oop_opr = as_oop_opr(r0); + r1_oop_opr = as_oop_opr(r1); + r2_oop_opr = as_oop_opr(r2); + r3_oop_opr = as_oop_opr(r3); + r4_oop_opr = as_oop_opr(r4); + r5_oop_opr = as_oop_opr(r5); + r6_oop_opr = as_oop_opr(r6); + r7_oop_opr = as_oop_opr(r7); + r8_oop_opr = as_oop_opr(r8); + r9_oop_opr = as_oop_opr(r9); + r10_oop_opr = as_oop_opr(r10); + r11_oop_opr = as_oop_opr(r11); + r12_oop_opr = as_oop_opr(r12); + r13_oop_opr = as_oop_opr(r13); + r14_oop_opr = as_oop_opr(r14); + r15_oop_opr = as_oop_opr(r15); + + r0_metadata_opr = as_metadata_opr(r0); + r1_metadata_opr = as_metadata_opr(r1); + r2_metadata_opr = as_metadata_opr(r2); + r3_metadata_opr = as_metadata_opr(r3); + r4_metadata_opr = as_metadata_opr(r4); + r5_metadata_opr = as_metadata_opr(r5); + + sp_opr = as_pointer_opr(sp); + + VMRegPair regs; + BasicType sig_bt = T_OBJECT; + SharedRuntime::java_calling_convention(&sig_bt, ®s, 1, true); + receiver_opr = as_oop_opr(regs.first()->as_Register()); + + rscratch1_opr = as_opr(rscratch1); + rscratch2_opr = as_opr(rscratch2); + rscratch_long_opr = as_long_opr(rscratch1, rscratch2); + + long0_opr = as_long_opr(r0, r1); + long1_opr = as_long_opr(r2, r3); + long2_opr = as_long_opr(r4, r5); + fpu0_float_opr = LIR_OprFact::single_fpu(0); + fpu0_double_opr = LIR_OprFact::double_fpu(0, 1); + + _caller_save_cpu_regs[0] = r0_opr; + _caller_save_cpu_regs[1] = r1_opr; + _caller_save_cpu_regs[2] = r2_opr; + _caller_save_cpu_regs[3] = r3_opr; + _caller_save_cpu_regs[4] = r4_opr; + _caller_save_cpu_regs[5] = r5_opr; + _caller_save_cpu_regs[6] = r6_opr; + _caller_save_cpu_regs[7] = r7_opr; + _caller_save_cpu_regs[8] = r8_opr; + + for (i = 0; i < nof_caller_save_fpu_regs; i++) { + _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); + } +} + +LIR_Opr FrameMap::stack_pointer() { + return sp_opr; +} + +// TODO: Make sure that neither method handle intrinsics nor compiled lambda +// forms modify sp register (i.e., vmIntrinsics::{_invokeBasic, _linkToVirtual, +// _linkToStatic, _linkToSpecial, _linkToInterface, _compiledLambdaForm}) +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { + return LIR_OprFact::illegalOpr; +} + +// Return LIR_Opr corresponding to the given VMRegPair and data type +LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { + LIR_Opr opr = LIR_OprFact::illegalOpr; + VMReg r_1 = reg->first(); + VMReg r_2 = reg->second(); + if (r_1->is_stack()) { + // Convert stack slot to sp-based address. The calling convention does not + // count the SharedRuntime::out_preserve_stack_slots() value, so we must + // add it in here. + int st_off = + (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * + VMRegImpl::stack_slot_size; + opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type)); + } else if (r_1->is_Register()) { + Register reg1 = r_1->as_Register(); +#ifdef HARD_FLOAT_CC + if (type == T_DOUBLE || type == T_FLOAT) { + ShouldNotReachHere(); + } else +#endif + if (type == T_LONG || type == T_DOUBLE) { + assert(r_2->is_Register(), "wrong VMReg"); + Register reg2 = r_2->as_Register(); + opr = as_long_opr(reg1, reg2); + } else if (type == T_OBJECT || type == T_ARRAY) { + opr = as_oop_opr(reg1); + } else if (type == T_METADATA) { + opr = as_metadata_opr(reg1); + } else { + opr = as_opr(reg1); + } + } else if (r_1->is_FloatRegister()) { + int num = r_1->as_FloatRegister()->encoding(); + if (type == T_FLOAT) { + opr = LIR_OprFact::single_fpu(num); + } else { + assert(is_even(num) && r_2->as_FloatRegister()->encoding() == (num + 1), + "wrong VMReg"); + opr = LIR_OprFact::double_fpu(num, num + 1); + } + } else { + ShouldNotReachHere(); + } + return opr; +} + +// Return VMReg corresponding to the given FPU register number as it is +// encoded in LIR_Opr. The conversion is straightforward because in this +// implementation the encoding of FPU registers in LIR_Opr's is the same as +// in FloatRegister's. +VMReg FrameMap::fpu_regname(int n) { + return as_FloatRegister(n)->as_VMReg(); +} + +// Check that the frame is properly addressable on the platform. The sp-based +// address of every frame slot must have the offset expressible as AArch32's +// imm12 with the separately stored sign. +bool FrameMap::validate_frame() { + int max_offset = in_bytes(framesize_in_bytes()); + int java_index = 0; + for (int i = 0; i < _incoming_arguments->length(); i++) { + LIR_Opr opr = _incoming_arguments->at(i); + if (opr->is_stack()) { + max_offset = MAX2(_argument_locations->at(java_index), max_offset); + } + java_index += type2size[opr->type()]; + } + return Assembler::is_valid_for_offset_imm(max_offset, 12); +} + +Address FrameMap::make_new_address(ByteSize sp_offset) const { + return Address(sp, in_bytes(sp_offset)); +} --- /dev/null 2018-09-25 19:24:22.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_FrameMap_aarch32.hpp 2018-09-25 19:24:22.000000000 +0300 @@ -0,0 +1,169 @@ +/* + * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_C1_FRAMEMAP_AARCH32_HPP +#define CPU_AARCH32_VM_C1_FRAMEMAP_AARCH32_HPP + +// The following schema visualizes how a C1 frame looks like on AArch32. +// It corresponds to the case of an unextended frame. Each line of text +// represents one 4-byte slot. Every monitor takes two slots. Positions of +// incoming arguments are determined by the Java calling convention. Spill +// area and monitor area are not required to be 8-byte aligned. The slot +// for deoptimization support is used by frame::deoptimize() method to save +// the original pc before patching in the new one. +// +// When LIR_Opr's reference stack slots, they use virtual stack slot indices. +// They are mapped to the real stack slots by FrameMap::sp_offset_for_slot() +// and FrameMap::sp_offset_for_double_slot() methods. The first _argcount +// virtual stack slots correspond to the real stack slots occupied by the +// incoming arguments. Their mapping is defined by _argument_locations array +// (which is filled in by applying the Java calling convention). All other +// virtual stack slots correspond to spill slots. +// +// Higher addresses +// | incoming | virtual stack slots +// | | [0 ... _arg_count - 1] +// | arguments | +// |====================================|----X- 8-byte aligned +// | previous lr | /|\ address +// rfp ===> |------------------------------------| | +// | previous rfp | | +// |====================================| | +// | alignment slot (if needed) | | +// |====================================| | +// | slot for deoptimization support | | +// |====================================| | +// | monitor [_num_monitors - 1] object | | +// | | | +// | monitor [_num_monitors - 1] lock | | +// |------------------------------------| | +// | | | +// Direction of | ... | | _framesize +// stack growth | | | slots +// | |------------------------------------| | +// V | monitor [0] object | | +// | | | +// | monitor [0] lock | | +// |====================================| | +// | spill slot [_num_spills - 1] | | virtual stack slot +// |------------------------------------| | [_arg_count + _num_spills - 1] +// | ... | | ... +// |------------------------------------| | +// | spill slot [0] | | virtual stack slot +// |====================================| | [_arg_count] +// | reserved argument area for | | +// | ... | | +// | outgoing calls (8-byte aligned) | \|/ +// sp ===> |====================================|----X- 8-byte aligned +// | | address +// Lower addresses + + public: + enum { + first_available_sp_in_frame = 0, + max_frame_pad = 16, // max value that frame::get_frame_size() may return + frame_pad_in_bytes = max_frame_pad + }; + + public: + static LIR_Opr r0_opr; + static LIR_Opr r1_opr; + static LIR_Opr r2_opr; + static LIR_Opr r3_opr; + static LIR_Opr r4_opr; + static LIR_Opr r5_opr; + static LIR_Opr r6_opr; + static LIR_Opr r7_opr; + static LIR_Opr r8_opr; + static LIR_Opr r9_opr; + static LIR_Opr r10_opr; + static LIR_Opr r11_opr; + static LIR_Opr r12_opr; + static LIR_Opr r13_opr; + static LIR_Opr r14_opr; + static LIR_Opr r15_opr; + + static LIR_Opr r0_oop_opr; + static LIR_Opr r1_oop_opr; + static LIR_Opr r2_oop_opr; + static LIR_Opr r3_oop_opr; + static LIR_Opr r4_oop_opr; + static LIR_Opr r5_oop_opr; + static LIR_Opr r6_oop_opr; + static LIR_Opr r7_oop_opr; + static LIR_Opr r8_oop_opr; + static LIR_Opr r9_oop_opr; + static LIR_Opr r10_oop_opr; + static LIR_Opr r11_oop_opr; + static LIR_Opr r12_oop_opr; + static LIR_Opr r13_oop_opr; + static LIR_Opr r14_oop_opr; + static LIR_Opr r15_oop_opr; + + static LIR_Opr r0_metadata_opr; + static LIR_Opr r1_metadata_opr; + static LIR_Opr r2_metadata_opr; + static LIR_Opr r3_metadata_opr; + static LIR_Opr r4_metadata_opr; + static LIR_Opr r5_metadata_opr; + + static LIR_Opr sp_opr; + static LIR_Opr receiver_opr; + + static LIR_Opr rscratch1_opr; + static LIR_Opr rscratch2_opr; + static LIR_Opr rscratch_long_opr; + + static LIR_Opr long0_opr; + static LIR_Opr long1_opr; + static LIR_Opr long2_opr; + static LIR_Opr fpu0_float_opr; + static LIR_Opr fpu0_double_opr; + + static LIR_Opr as_long_opr(Register r1, Register r2) { + return LIR_OprFact::double_cpu(cpu_reg2rnr(r1), cpu_reg2rnr(r2)); + } + static LIR_Opr as_pointer_opr(Register r) { + return LIR_OprFact::single_cpu(cpu_reg2rnr(r)); + } + + static VMReg fpu_regname(int n); + + static bool is_caller_save_register(LIR_Opr opr) { + // On AArch32, unlike on SPARC, we never explicitly request the C1 register + // allocator to allocate a callee-saved register. Since the only place this + // method is called is the assert in LinearScan::color_lir_opr(), we can + // safely just always return true here. + return true; + } + static int nof_caller_save_cpu_regs() { + return pd_nof_caller_save_cpu_regs_frame_map; + } + static int last_cpu_reg() { + return pd_last_cpu_reg; + } + +#endif // CPU_AARCH32_VM_C1_FRAMEMAP_AARCH32_HPP --- /dev/null 2018-09-25 19:24:23.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_LIRAssembler_aarch32.cpp 2018-09-25 19:24:23.000000000 +0300 @@ -0,0 +1,3280 @@ +/* + * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArrayKlass.hpp" +#include "ci/ciInstance.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "nativeInst_aarch32.hpp" +#include "oops/objArrayKlass.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch32.inline.hpp" + +#include "register_aarch32.hpp" + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +NEEDS_CLEANUP // remove this definitions ? +const Register IC_Klass = rscratch2; // where the IC klass is cached +const Register SYNC_header = r0; // synchronization header +const Register SHIFT_count = r0; // where count for shift operations must be + +#define __ _masm-> + + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, extra); + tmp2 = extra; + } + assert_different_registers(preserve, tmp1, tmp2); +} + + + +static void select_different_registers(Register preserve, + Register extra, + Register &tmp1, + Register &tmp2, + Register &tmp3) { + if (tmp1 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp1 = extra; + } else if (tmp2 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp2 = extra; + } else if (tmp3 == preserve) { + assert_different_registers(tmp1, tmp2, tmp3, extra); + tmp3 = extra; + } + assert_different_registers(preserve, tmp1, tmp2, tmp3); +} + +bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; } + + +LIR_Opr LIR_Assembler::receiverOpr() { + return FrameMap::receiver_opr; +} + +LIR_Opr LIR_Assembler::osrBufferPointer() { + return FrameMap::as_pointer_opr(receiverOpr()->as_register()); +} + +//--------------fpu register translations----------------------- + + +address LIR_Assembler::float_constant(float f) { + address const_addr = __ float_constant(f); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + + +address LIR_Assembler::double_constant(double d) { + address const_addr = __ double_constant(d); + if (const_addr == NULL) { + bailout("const section overflow"); + return __ code()->consts()->start(); + } else { + return const_addr; + } +} + +void LIR_Assembler::set_24bit_FPU() { Unimplemented(); } + +void LIR_Assembler::reset_FPU() { Unimplemented(); } + +void LIR_Assembler::fpop() { Unimplemented(); } + +void LIR_Assembler::fxch(int i) { Unimplemented(); } + +void LIR_Assembler::fld(int i) { Unimplemented(); } + +void LIR_Assembler::ffree(int i) { Unimplemented(); } + +void LIR_Assembler::breakpoint() { __ bkpt(0); } + +void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); } + +void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); } + +//------------------------------------------- + +static Register as_reg(LIR_Opr op) { + return op->is_double_cpu() ? op->as_register_lo() : op->as_register(); +} + +Address LIR_Assembler::as_Address(LIR_Address* addr) { + // as_Address(LIR_Address*, Address::InsnDataType) should be used instead + ShouldNotCallThis(); + return Address(); +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr) { + // as_Address_hi(LIR_Address*, Address::InsnDataType) should be used instead + ShouldNotCallThis(); + return Address(); +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr) { + // as_Address_lo(LIR_Address*, Address::InsnDataType) should be used instead + ShouldNotCallThis(); + return Address(); +} + +Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp, Address::InsnDataType type) { + if (addr->base()->is_illegal()) { + assert(addr->index()->is_illegal(), "must be illegal too"); + __ mov(tmp, addr->disp()); + return Address(tmp); // encoding is ok for any data type + } + + Register base = addr->base()->as_pointer_register(); + + if (addr->index()->is_illegal()) { + return Address(base, addr->disp()).safe_for(type, _masm, tmp); + } else if (addr->index()->is_cpu_register()) { + assert(addr->disp() == 0, "must be"); + Register index = addr->index()->as_pointer_register(); + return Address(base, index, lsl(addr->scale())).safe_for(type, _masm, tmp); + } else if (addr->index()->is_constant()) { + intptr_t addr_offset = (addr->index()->as_constant_ptr()->as_jint() << addr->scale()) + addr->disp(); + return Address(base, addr_offset).safe_for(type, _masm, tmp); + } + + Unimplemented(); + return Address(); +} + +Address LIR_Assembler::as_Address_hi(LIR_Address* addr, Address::InsnDataType type) { + assert(type == Address::IDT_INT, "only to be used for accessing high word of jlong"); + + if (addr->base()->is_illegal()) { + assert(addr->index()->is_illegal(), "must be illegal too"); + __ mov(rscratch1, addr->disp() + wordSize); + return Address(rscratch1); // encoding is ok for IDR_INT + } + + Register base = addr->base()->as_pointer_register(); + + if (addr->index()->is_illegal()) { + return Address(base, addr->disp() + wordSize).safe_for(Address::IDT_INT, _masm, rscratch1); + } else if (addr->index()->is_cpu_register()) { + assert(addr->disp() == 0, "must be"); + Register index = addr->index()->as_pointer_register(); + __ add(rscratch1, base, wordSize); + return Address(rscratch1, index, lsl(addr->scale())); // encoding is ok for IDT_INT + } else if (addr->index()->is_constant()) { + intptr_t addr_offset = (addr->index()->as_constant_ptr()->as_jint() << addr->scale()) + addr->disp() + wordSize; + return Address(base, addr_offset).safe_for(Address::IDT_INT, _masm, rscratch1); + } + + Unimplemented(); + return Address(); +} + +Address LIR_Assembler::as_Address_lo(LIR_Address* addr, Address::InsnDataType type) { + return as_Address(addr, rscratch1, type); +} + + +void LIR_Assembler::osr_entry() { + offsets()->set_value(CodeOffsets::OSR_Entry, code_offset()); + BlockBegin* osr_entry = compilation()->hir()->osr_entry(); + ValueStack* entry_state = osr_entry->state(); + int number_of_locks = entry_state->locks_size(); + + // we jump here if osr happens with the interpreter + // state set up to continue at the beginning of the + // loop that triggered osr - in particular, we have + // the following registers setup: + // + // r1: osr buffer + // + + // build frame + ciMethod* m = compilation()->method(); + __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); + + // OSR buffer is + // + // locals[nlocals-1..0] + // monitors[0..number_of_locks] + // + // locals is a direct copy of the interpreter frame so in the osr buffer + // so first slot in the local array is the last local from the interpreter + // and last slot is local[0] (receiver) from the interpreter + // + // Similarly with locks. The first lock slot in the osr buffer is the nth lock + // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock + // in the interpreter frame (the method lock if a sync method) + + // Initialize monitors in the compiled activation. + // r1: pointer to osr buffer + // + // All other registers are dead at this point and the locals will be + // copied into place by code emitted in the IR. + + Register OSR_buf = osrBufferPointer()->as_pointer_register(); + { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below"); + int monitor_offset = BytesPerWord * method()->max_locals() + + (2 * BytesPerWord) * (number_of_locks - 1); + // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in + // the OSR buffer using 2 word entries: first the lock and then + // the oop. + for (int i = 0; i < number_of_locks; i++) { + int slot_offset = monitor_offset - ((i * 2) * BytesPerWord); +#ifdef ASSERT + // verify the interpreter's monitor has a non-null object + { + Label L; + __ ldr(rscratch1, Address(OSR_buf, slot_offset + 1*BytesPerWord)); + __ cbnz(rscratch1, L); + __ stop("locked object is NULL"); + __ bind(L); + } +#endif + __ ldr(rscratch1, Address(OSR_buf, slot_offset + 0)); + __ str(rscratch1, frame_map()->address_for_monitor_lock(i)); + __ ldr(rscratch1, Address(OSR_buf, slot_offset + 1*BytesPerWord)); + __ str(rscratch1, frame_map()->address_for_monitor_object(i)); + } + } +} + + +// inline cache check; done before the frame is built. +int LIR_Assembler::check_icache() { + Register receiver = FrameMap::receiver_opr->as_register(); + Register ic_klass = IC_Klass; + int start_offset = __ offset(); + __ inline_cache_check(receiver, ic_klass); + + // if icache check fails, then jump to runtime routine + // Note: RECEIVER must still contain the receiver! + Label dont; + __ b(dont, Assembler::EQ); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // We align the verified entry point unless the method body + // (including its inline cache check) will fit in a single 64-byte + // icache line. + if (! method()->is_accessor() || __ offset() - start_offset > 4 * 4) { + // force alignment after the cache check. + __ align(CodeEntryAlignment); + } + + __ bind(dont); + return start_offset; +} + + +void LIR_Assembler::jobject2reg(jobject o, Register reg) { + if (o == NULL) { + __ mov(reg, 0); + } else { + __ movoop(reg, o, /*immediate*/true); + } +} + +void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); + add_call_info_here(info); +} + +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + PatchingStub* patch = new PatchingStub(_masm, patching_id(info)); + __ relocate(oop_Relocation::spec(__ oop_recorder()->allocate_oop_index(NULL))); + __ patchable_load(reg, pc()); + patching_epilog(patch, lir_patch_normal, reg, info); +} + +// Return sp decrement needed to build a frame +int LIR_Assembler::initial_frame_size_in_bytes() const { + // We need to subtract size of saved backtrace structure in counting frame size + return in_bytes(frame_map()->framesize_in_bytes()) - frame::get_frame_size() * wordSize; +} + +int LIR_Assembler::emit_exception_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(exception_handler_size()); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("exception handler overflow"); + return -1; + } + + int offset = code_offset(); + + // the exception oop and pc are in r0, and r3 + // no other registers need to be preserved, so invalidate them + __ invalidate_registers(false, true, false); + + // check that there is really an exception + __ verify_not_null_oop(r0); + + // search an exception handler (r0: exception oop, r3: throwing pc) + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id))); __ should_not_reach_here(); + guarantee(code_offset() - offset <= exception_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + + +// Emit the code to remove the frame from the stack in the exception +// unwind path. +int LIR_Assembler::emit_unwind_handler() { +#ifndef PRODUCT + if (CommentedAssembly) { + _masm->block_comment("Unwind handler"); + } +#endif + + int offset = code_offset(); + + // Fetch the exception from TLS and clear out exception related thread state + __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ str(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + + __ bind(_unwind_handler_entry); + __ verify_not_null_oop(r0); + + // Preform needed unlocking + MonitorExitStub* stub = NULL; + if (method()->is_synchronized()) { + monitor_address(0, FrameMap::r1_opr); + stub = new MonitorExitStub(FrameMap::r1_opr, true, 0); + __ unlock_object(r5, r4, r1, *stub->entry()); + __ bind(*stub->continuation()); + } + + if (compilation()->env()->dtrace_method_probes()) { + __ call_Unimplemented(); +#if 0 + // FIXME check exception_store is not clobbered below! + __ movptr(Address(rsp, 0), rax); + __ mov_metadata(Address(rsp, sizeof(void*)), method()->constant_encoding()); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit))); +#endif + } + + // remove the activation and dispatch to the unwind handler + __ block_comment("remove_frame and dispatch to the unwind handler"); + __ remove_frame(initial_frame_size_in_bytes()); + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); + + // Emit the slow path assembly + if (stub != NULL) { + stub->emit_code(this); + } + + return offset; +} + + +int LIR_Assembler::emit_deopt_handler() { + // if the last instruction is a call (typically to do a throw which + // is coming at the end after block reordering) the return address + // must still point into the code area in order to avoid assertion + // failures when searching for the corresponding bci => add a nop + // (was bug 5/14/1999 - gri) + __ nop(); + + // generate code for exception handler + address handler_base = __ start_a_stub(deopt_handler_size()); + if (handler_base == NULL) { + // not enough space left for the handler + bailout("deopt handler overflow"); + return -1; + } + + int offset = code_offset(); + + __ adr(lr, pc()); + // deopt handler expects deopt pc already pushed to stack, since for C2 + // it's not possible to allocate any register to hold the value + __ push(RegSet::of(lr), sp); + __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + guarantee(code_offset() - offset <= deopt_handler_size(), "overflow"); + __ end_a_stub(); + + return offset; +} + +void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { + _masm->code_section()->relocate(adr, relocInfo::poll_type); + int pc_offset = code_offset(); + flush_debug_info(pc_offset); + info->record_debug_info(compilation()->debug_info_recorder(), pc_offset); + if (info->exception_handlers() != NULL) { + compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers()); + } +} + +void LIR_Assembler::return_op(LIR_Opr result) { + assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,"); + // Pop the stack before the safepoint code + __ remove_frame(initial_frame_size_in_bytes()); + + if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) { + __ reserved_stack_check(); + } + + address polling_page(os::get_polling_page()); + __ read_polling_page(rscratch2, polling_page, relocInfo::poll_return_type); + __ ret(lr); +} + +int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { + address polling_page(os::get_polling_page()); + guarantee(info != NULL, "Shouldn't be NULL"); + assert(os::is_poll_address(polling_page), "should be"); + __ get_polling_page(rscratch2, polling_page, relocInfo::poll_type); + add_debug_info_for_branch(info); // This isn't just debug info: + // it's the oop map + __ read_polling_page(rscratch2, relocInfo::poll_type); + return __ offset(); +} + +void LIR_Assembler::move_regs(Register from_reg, Register to_reg) { + if (from_reg != to_reg) { + __ mov(to_reg, from_reg); + } +} + +void LIR_Assembler::swap_reg(Register a, Register b) { + Unimplemented(); +} + +void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(src->is_constant(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + + switch (c->type()) { + case T_INT: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register(), c->as_jint_bits()); + break; + } + + case T_ADDRESS: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register(), c->as_jint()); + break; + } + + case T_LONG: { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register_lo(), c->as_jint_lo_bits()); + __ mov(dest->as_register_hi(), c->as_jint_hi_bits()); + break; + } + + case T_OBJECT: { + if (patch_code == lir_patch_none) { + jobject2reg(c->as_jobject(), dest->as_register()); + } else { + jobject2reg_with_patching(dest->as_register(), info); + } + break; + } + + case T_METADATA: { + if (patch_code != lir_patch_none) { + klass2reg_with_patching(dest->as_register(), info); + } else { + __ mov_metadata(dest->as_register(), c->as_metadata()); + } + break; + } + + case T_FLOAT: { + if(dest->is_single_fpu()) { + if (__ operand_valid_for_float_immediate(c->as_jfloat())) { + __ vmov_f32(dest->as_float_reg(), c->as_jfloat()); + } else { + __ lea(rscratch1, InternalAddress(float_constant(c->as_jfloat()))); + __ vldr_f32(dest->as_float_reg(), Address(rscratch1)); + } + } else { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register(), c->as_jint_bits()); + } + break; + } + + case T_DOUBLE: { + if(dest->is_double_fpu()) { + if (__ operand_valid_for_double_immediate(c->as_jdouble())) { + __ vmov_f64(dest->as_double_reg(), c->as_jdouble()); + } else { + __ lea(rscratch1, InternalAddress(double_constant(c->as_jdouble()))); + __ vldr_f64(dest->as_double_reg(), Address(rscratch1)); + } + } else { + assert(patch_code == lir_patch_none, "no patching handled here"); + __ mov(dest->as_register_lo(), c->as_jint_lo_bits()); + __ mov(dest->as_register_hi(), c->as_jint_hi_bits()); + } + break; + } + + default: + ShouldNotReachHere(); + } +} + +void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { + LIR_Const* c = src->as_constant_ptr(); + switch (c->type()) { + case T_OBJECT: + { + if (! c->as_jobject()) { + __ mov(rscratch1, 0); + __ str(rscratch1, frame_map()->address_for_slot(dest->single_stack_ix())); + } else { + const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false); + } + } + break; + case T_ADDRESS: + { + const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL); + reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false); + } + case T_INT: + case T_FLOAT: + { + __ mov(rscratch1, c->as_jint_bits()); + __ str(rscratch1, frame_map()->address_for_slot(dest->single_stack_ix())); + } + break; + case T_LONG: + case T_DOUBLE: + { + __ mov(rscratch1, c->as_jint_lo()); + __ str(rscratch1, frame_map()->address_for_slot(dest->double_stack_ix(), + lo_word_offset_in_bytes)); + if (c->as_jint_lo() != c->as_jint_hi()) + __ mov(rscratch1, c->as_jint_hi()); + __ str(rscratch1, frame_map()->address_for_slot(dest->double_stack_ix(), + hi_word_offset_in_bytes)); + } + break; + default: + ShouldNotReachHere(); + } +} + +/* + * For now this code can load only zero constants as in aarch32. + * It seems like this implementation can break some tests in future. + * TODO: ensure, write test, and rewrite if need. + */ +void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) { + assert(src->is_constant(), "should not call otherwise"); + LIR_Const* c = src->as_constant_ptr(); + LIR_Address* to_addr = dest->as_address_ptr(); + + void (Assembler::* insn)(Register Rt, const Address &adr, Assembler::Condition cnd); + + __ mov(rscratch2, 0); + + int null_check_here = code_offset(); + + Address::InsnDataType idt = Address::toInsnDataType(type); + switch (type) { + case T_ADDRESS: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::str; + break; + case T_LONG: { + assert(c->as_jlong() == 0, "should be"); + insn = &Assembler::str; + Address addr = as_Address_hi(to_addr, Address::IDT_INT); + null_check_here = code_offset(); + __ str(rscratch2, addr); + idt = Address::IDT_INT; + break; + } + case T_INT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::str; + break; + case T_OBJECT: + case T_ARRAY: + assert(c->as_jobject() == 0, "should be"); + insn = &Assembler::str; + break; + case T_CHAR: + case T_SHORT: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::strh; + break; + case T_BOOLEAN: + case T_BYTE: + assert(c->as_jint() == 0, "should be"); + insn = &Assembler::strb; + break; + default: + ShouldNotReachHere(); + insn = &Assembler::str; // unreachable + } + + (_masm->*insn)(rscratch2, as_Address(to_addr, idt), Assembler::C_DFLT); + if (info) add_debug_info_for_null_check(null_check_here, info); +} + +void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { + assert(src->is_register(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + // move between cpu-registers + if (dest->is_single_cpu()) { + if (src->type() == T_LONG) { + // Can do LONG -> OBJECT + __ stop("investigate how \"LONG -> OBJECT\" works especially when high part is != 0"); + move_regs(src->as_register_lo(), dest->as_register()); + return; + } + if(src->is_single_fpu()) { + __ vmov_f32(dest->as_register(), src->as_float_reg()); + } else { + assert(src->is_single_cpu(), "must match"); + if (src->type() == T_OBJECT) { + __ verify_oop(src->as_register()); + } + move_regs(src->as_register(), dest->as_register()); + } + } else if (dest->is_double_cpu()) { + if(src->is_double_fpu()) { + __ vmov_f64(dest->as_register_lo(), dest->as_register_hi(), src->as_double_reg()); + } else { + assert(src->is_double_cpu(), "must match"); + Register f_lo = src->as_register_lo(); + Register f_hi = src->as_register_hi(); + Register t_lo = dest->as_register_lo(); + Register t_hi = dest->as_register_hi(); + assert(f_hi != f_lo, "must be different"); + assert(t_hi != t_lo, "must be different"); + check_register_collision(t_lo, &f_hi); + move_regs(f_lo, t_lo); + move_regs(f_hi, t_hi); + } + } else if (dest->is_single_fpu()) { + if(src->is_single_cpu()) { + __ vmov_f32(dest->as_float_reg(), src->as_register()); + } else { + __ vmov_f32(dest->as_float_reg(), src->as_float_reg()); + } + } else if (dest->is_double_fpu()) { + if(src->is_double_cpu()) { + __ vmov_f64(dest->as_double_reg(), src->as_register_lo(), src->as_register_hi()); + } else { + __ vmov_f64(dest->as_double_reg(), src->as_double_reg()); + } + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) { + if (src->is_single_cpu()) { + if (type == T_ARRAY || type == T_OBJECT) { + __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); + __ verify_oop(src->as_register()); + } else { + __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix())); + } + + } else if (src->is_double_cpu()) { + Address dest_addr_LO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes); + __ strd(src->as_register_lo(), src->as_register_hi(), dest_addr_LO); + } else if (src->is_single_fpu()) { + Address dest_addr = frame_map()->address_for_slot(dest->single_stack_ix()); + __ vstr_f32(src->as_float_reg(), dest_addr.safe_for(Address::IDT_FLOAT, _masm, rscratch1)); + } else if (src->is_double_fpu()) { + Address dest_addr = frame_map()->address_for_slot(dest->double_stack_ix()); + __ vstr_f64(src->as_double_reg(), dest_addr.safe_for(Address::IDT_DOUBLE, _masm, rscratch1)); + } else { + ShouldNotReachHere(); + } + +} + + +void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) { + LIR_Address* to_addr = dest->as_address_ptr(); + + if (type == T_ARRAY || type == T_OBJECT) { + __ verify_oop(src->as_register()); + } + + PatchingStub* patch = NULL; + if (patch_code != lir_patch_none) { + assert(to_addr->disp() != 0, "must have"); + + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + address const_addr = __ address_constant(0); + if (!const_addr) BAILOUT("patchable offset"); + __ relocate(section_word_Relocation::spec(const_addr, CodeBuffer::SECT_CONSTS)); + __ patchable_load(rscratch1, const_addr); + patching_epilog(patch, patch_code, to_addr->base()->as_register(), info); + + to_addr = new LIR_Address(to_addr->base(), FrameMap::rscratch1_opr, to_addr->type()); + } + + + int null_check_here = code_offset(); + switch (type) { + case T_FLOAT: + if(src->is_single_fpu()) { + Address addr = as_Address(to_addr, Address::IDT_FLOAT); + null_check_here = code_offset(); + __ vstr_f32(src->as_float_reg(), addr); + break; + } // fall through at FPUless system + case T_ARRAY: // fall through + case T_OBJECT: // fall through + case T_ADDRESS: // fall though + case T_INT: { + Address addr = as_Address(to_addr, Address::toInsnDataType(type)); + null_check_here = code_offset(); + __ str(src->as_register(), addr); + break; + } + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); +// __ str(src->as_register(), as_Address(to_addr)); + break; + + case T_DOUBLE: + if(src->is_double_fpu()) { + Address addr = as_Address(to_addr, Address::IDT_DOUBLE); + null_check_here = code_offset(); + __ vstr_f64(src->as_double_reg(), addr); + break; + } // fall through at FPUless system + case T_LONG: { + Address addr = as_Address_lo(to_addr, Address::IDT_LONG); + null_check_here = code_offset(); + null_check_here += __ strd(src->as_register_lo(), src->as_register_hi(), addr); + break; + } + + case T_BYTE: // fall through + case T_BOOLEAN: { + Address addr = as_Address(to_addr, Address::toInsnDataType(type)); + null_check_here = code_offset(); + __ strb(src->as_register(), addr); + break; + } + case T_CHAR: // fall through + case T_SHORT: { + Address addr = as_Address(to_addr, Address::toInsnDataType(type)); + null_check_here = code_offset(); + __ strh(src->as_register(), addr); + break; + } + default: + ShouldNotReachHere(); + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_here, info); + } +} + + +void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { + assert(src->is_stack(), "should not call otherwise"); + assert(dest->is_register(), "should not call otherwise"); + + if (dest->is_single_cpu()) { + if (type == T_ARRAY || type == T_OBJECT) { + __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); + __ verify_oop(dest->as_register()); + } else { + __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix())); + } + + } else if (dest->is_double_cpu()) { + Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes); + __ ldrd(dest->as_register_lo(), dest->as_register_hi(), src_addr_LO); + } else if (dest->is_single_fpu()) { + Address src_addr = frame_map()->address_for_slot(src->single_stack_ix()); + __ vldr_f32(dest->as_float_reg(), src_addr.safe_for(Address::IDT_FLOAT, _masm, rscratch1)); + } else if (dest->is_double_fpu()) { + Address src_addr = frame_map()->address_for_slot(src->double_stack_ix()); + __ vldr_f64(dest->as_double_reg(), src_addr.safe_for(Address::IDT_DOUBLE, _masm, rscratch1)); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { + PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id); + __ relocate(metadata_Relocation::spec(__ oop_recorder()->allocate_metadata_index(NULL))); + __ patchable_load(reg, pc()); + patching_epilog(patch, lir_patch_normal, reg, info); +} + +void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { + + LIR_Opr temp; + if (type == T_LONG || type == T_DOUBLE) + temp = FrameMap::rscratch_long_opr; + else + temp = FrameMap::rscratch1_opr; + + stack2reg(src, temp, src->type()); + reg2stack(temp, dest, dest->type(), false); +} + + +void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) { + LIR_Address* from_addr = src->as_address_ptr(); + + if (from_addr->base()->type() == T_OBJECT) { + __ verify_oop(from_addr->base()->as_pointer_register()); + } + + PatchingStub* patch = NULL; + if (patch_code != lir_patch_none) { + assert(from_addr->disp() != 0, "must have"); + + patch = new PatchingStub(_masm, PatchingStub::access_field_id); + address const_addr = __ address_constant(0); + if (!const_addr) BAILOUT("patchable offset"); + __ relocate(section_word_Relocation::spec(const_addr, CodeBuffer::SECT_CONSTS)); + __ patchable_load(rscratch1, const_addr); + patching_epilog(patch, patch_code, from_addr->base()->as_register(), info); + + from_addr = new LIR_Address(from_addr->base(), FrameMap::rscratch1_opr, from_addr->type()); + } + + int null_check_here = code_offset(); + + switch (type) { + case T_FLOAT: + if(dest->is_single_fpu()){ + Address addr = as_Address(from_addr, Address::IDT_FLOAT); + null_check_here = code_offset(); + __ vldr_f32(dest->as_float_reg(), addr); + break; + } // fall through at FPUless systems + case T_ARRAY: // fall through + case T_OBJECT: // fall through + case T_ADDRESS: // fall through + case T_INT: { + Address addr = as_Address(from_addr, Address::toInsnDataType(type)); + null_check_here = code_offset(); + __ ldr(dest->as_register(), addr); + break; + } + case T_METADATA: + // We get here to store a method pointer to the stack to pass to + // a dtrace runtime call. This can't work on 64 bit with + // compressed klass ptrs: T_METADATA can be a compressed klass + // ptr or a 64 bit method pointer. + ShouldNotReachHere(); +// __ ldr(dest->as_register(), as_Address(from_addr)); + break; + case T_DOUBLE: + if(dest->is_double_fpu()){ + Address addr = as_Address(from_addr, Address::IDT_DOUBLE); + null_check_here = code_offset(); + __ vldr_f64(dest->as_double_reg(), addr); + break; + } // fall through at FPUless systems + case T_LONG: { + Address addr = as_Address_lo(from_addr, Address::IDT_LONG); + null_check_here = code_offset(); + null_check_here += __ ldrd(dest->as_register_lo(), dest->as_register_hi(), addr); + break; + } + + case T_BYTE: { + Address addr = as_Address(from_addr, Address::IDT_BYTE); + null_check_here = code_offset(); + __ ldrsb(dest->as_register(), addr); + break; + } + case T_BOOLEAN: { + Address addr = as_Address(from_addr, Address::IDT_BOOLEAN); + null_check_here = code_offset(); + __ ldrb(dest->as_register(), addr); + break; + } + + case T_CHAR: { + Address addr = as_Address(from_addr, Address::IDT_CHAR); + null_check_here = code_offset(); + __ ldrh(dest->as_register(), addr); + break; + } + case T_SHORT: { + Address addr = as_Address(from_addr, Address::IDT_SHORT); + null_check_here = code_offset(); + __ ldrsh(dest->as_register(), addr); + break; + } + + default: + ShouldNotReachHere(); + } + + if (type == T_ARRAY || type == T_OBJECT) { + __ verify_oop(dest->as_register()); + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_here, info); + } +} + + +int LIR_Assembler::array_element_size(BasicType type) const { + int elem_size = type2aelembytes(type); + return exact_log2(elem_size); +} + +void LIR_Assembler::emit_op3(LIR_Op3* op) { + Register Rdividend = op->in_opr1()->as_register(); + Register Rdivisor = op->in_opr2()->as_register(); + Register Rscratch = op->in_opr3()->as_register(); + Register Rresult = op->result_opr()->as_register(); + int divisor = -1; + + /* + TODO: For some reason, using the Rscratch that gets passed in is + not possible because the register allocator does not see the tmp reg + as used, and assignes it the same register as Rdividend. We use rscratch1 + instead. + + assert(Rdividend != Rscratch, ""); + assert(Rdivisor != Rscratch, ""); + */ + + if (Rdivisor == noreg && is_power_of_2(divisor)) { + // convert division by a power of two into some shifts and logical operations + } + + assert(op->code() == lir_irem || op->code() == lir_idiv, "should be irem or idiv"); + bool want_remainder = op->code() == lir_irem; + + __ divide(Rresult, Rdividend, Rdivisor, 32, want_remainder); +} + +void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { +#ifdef ASSERT + assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label"); + if (op->block() != NULL) _branch_target_blocks.append(op->block()); + if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock()); +#endif + + if (op->cond() == lir_cond_always) { + if (op->info() != NULL) add_debug_info_for_branch(op->info()); + __ b(*(op->label())); + } else { + Assembler::Condition acond; + if (op->code() == lir_cond_float_branch) { + bool is_unordered = (op->ublock() == op->block()); + // Assembler::EQ does not permit unordered branches, so we add + // another branch here. Likewise, Assembler::NE does not permit + // ordered branches. + if (is_unordered && op->cond() == lir_cond_equal + || !is_unordered && op->cond() == lir_cond_notEqual) + __ b(*(op->ublock()->label()), Assembler::VS); + switch(op->cond()) { + case lir_cond_equal: acond = Assembler::EQ; break; + case lir_cond_notEqual: acond = Assembler::NE; break; + case lir_cond_less: acond = (is_unordered ? Assembler::LT : Assembler::LO); break; + case lir_cond_lessEqual: acond = (is_unordered ? Assembler::LE : Assembler::LS); break; + case lir_cond_greaterEqual: acond = (is_unordered ? Assembler::HS : Assembler::GE); break; + case lir_cond_greater: acond = (is_unordered ? Assembler::HI : Assembler::GT); break; + default: ShouldNotReachHere(); + acond = Assembler::EQ; // unreachable + } + } else { + switch (op->cond()) { + case lir_cond_equal: acond = Assembler::EQ; break; + case lir_cond_notEqual: acond = Assembler::NE; break; + case lir_cond_less: acond = Assembler::LT; break; + case lir_cond_greaterEqual: acond = Assembler::GE; break; + case lir_cond_lessEqual: acond = Assembler::LE; break; + case lir_cond_greater: acond = Assembler::GT; break; + case lir_cond_belowEqual: acond = Assembler::LS; break; + case lir_cond_aboveEqual: acond = Assembler::HS; break; + default: ShouldNotReachHere(); + acond = Assembler::EQ; // unreachable + } + if (op->type() == T_LONG) { + // a special trick here to be able to effectively compare jlongs + // for the lessEqual and greater conditions the jlong operands are swapped + // during comparison and hence should use mirror condition in conditional + // instruction + // see LIR_Assembler::comp_op and LIR_Assembler::cmove + switch (op->cond()) { + case lir_cond_lessEqual: acond = Assembler::GE; break; + case lir_cond_greater: acond = Assembler::LT; break; + } + } + } + __ b(*(op->label()), acond); + } +} + +FloatRegister LIR_Assembler::as_float_reg(LIR_Opr doubleReg) { + assert(doubleReg->is_double_fpu(), "must be f64"); + return as_FloatRegister(doubleReg->fpu_regnrLo()); +} + +void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); + + switch (op->bytecode()) { + case Bytecodes::_i2f: + { + __ vmov_f32(dest->as_float_reg(), src->as_register()); + __ vcvt_f32_s32(dest->as_float_reg(), dest->as_float_reg()); + break; + } + case Bytecodes::_i2d: + { + __ vmov_f32(as_float_reg(dest), src->as_register()); + __ vcvt_f64_s32(dest->as_double_reg(), as_float_reg(dest)); + break; + } + case Bytecodes::_f2d: + { + __ vcvt_f64_f32(dest->as_double_reg(), src->as_float_reg()); + break; + } + case Bytecodes::_d2f: + { + __ vcvt_f32_f64(dest->as_float_reg(), src->as_double_reg()); + break; + } + case Bytecodes::_i2c: + { + __ uxth(dest->as_register(), src->as_register()); + break; + } + case Bytecodes::_i2l: + { + const Register dst_hi = dest->as_register_hi(); + const Register dst_lo = dest->as_register_lo(); + const Register src_lo = as_reg(src); + __ mov(dst_lo, src_lo); + __ asr(dst_hi, src_lo, 31); + break; + } + case Bytecodes::_i2s: + { + __ sxth(dest->as_register(), src->as_register()); + break; + } + case Bytecodes::_i2b: + { + __ sxtb(dest->as_register(), src->as_register()); + break; + } + case Bytecodes::_l2i: + { + assert(dest->is_single_cpu(), "must be single register"); + __ mov(dest->as_register(), src->as_register_lo()); + break; + } + case Bytecodes::_f2i: + { + __ vcvt_s32_f32(src->as_float_reg(), src->as_float_reg()); + __ vmov_f32(dest->as_register(), src->as_float_reg()); + break; + } + case Bytecodes::_d2i: + { + __ vcvt_s32_f64(as_float_reg(src), src->as_double_reg()); + __ vmov_f32(dest->as_register(), as_float_reg(src)); + break; + } + default: ShouldNotReachHere(); + } +} + +void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { + if (op->init_check()) { + __ ldrb(rscratch1, Address(op->klass()->as_register(), + InstanceKlass::init_state_offset())); + __ cmp(rscratch1, InstanceKlass::fully_initialized); + add_debug_info_for_null_check_here(op->stub()->info()); + __ b(*op->stub()->entry(), Assembler::NE); + } + __ allocate_object(op->obj()->as_register(), + op->tmp1()->as_register(), + op->tmp2()->as_register(), + op->header_size(), + op->object_size(), + op->klass()->as_register(), + *op->stub()->entry()); + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { + Register len = as_reg(op->len()); + + if (UseSlowPath || + (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) || + (!UseFastNewTypeArray && (op->type() != T_OBJECT && op->type() != T_ARRAY))) { + __ b(*op->stub()->entry()); + } else { + Register tmp1 = op->tmp1()->as_register(); + Register tmp2 = op->tmp2()->as_register(); + Register tmp3 = op->tmp3()->as_register(); + if (len == tmp1) { + tmp1 = tmp3; + } else if (len == tmp2) { + tmp2 = tmp3; + } else if (len == tmp3) { + // everything is ok + } else { + __ mov(tmp3, len); + } + __ allocate_array(op->obj()->as_register(), + len, + tmp1, + tmp2, + arrayOopDesc::header_size(op->type()), + array_element_size(op->type()), + op->klass()->as_register(), + *op->stub()->entry()); + } + __ bind(*op->stub()->continuation()); +} + +void LIR_Assembler::type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done) { + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + // See if the receiver is receiver[n]. + __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + __ ldr(rscratch1, Address(rscratch2)); + __ cmp(recv, rscratch1); + __ b(next_test, Assembler::NE); + Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + __ b(*update_done); + __ bind(next_test); + } + + // Didn't find receiver; find next empty slot and fill it in + for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) { + Label next_test; + __ lea(rscratch2, + Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)))); + Address recv_addr(rscratch2); + __ ldr(rscratch1, recv_addr); + __ cbnz(rscratch1, next_test); + __ str(recv, recv_addr); + __ mov(rscratch1, DataLayout::counter_increment); + __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)))); + __ str(rscratch1, Address(rscratch2)); + __ b(*update_done); + __ bind(next_test); + } +} + +void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) { + // we always need a stub for the failure case. + CodeStub* stub = op->stub(); + Register obj = op->object()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register dst = op->result_opr()->as_register(); + ciKlass* k = op->klass(); + Register Rtmp1 = noreg; + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + const bool should_profile = op->should_profile(); + if (should_profile) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + Label profile_cast_success, profile_cast_failure; + Label *success_target = op->should_profile() ? &profile_cast_success : success; + Label *failure_target = op->should_profile() ? &profile_cast_failure : failure; + + if (obj == k_RInfo) { + k_RInfo = dst; + } else if (obj == klass_RInfo) { + klass_RInfo = dst; + } + if (k->is_loaded()) { + select_different_registers(obj, dst, k_RInfo, klass_RInfo); + } else { + Rtmp1 = op->tmp3()->as_register(); + select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1); + } + + assert_different_registers(obj, k_RInfo, klass_RInfo); + + if (should_profile) { + Label not_null; + __ cbnz(obj, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, DataLayout::flags_offset()), + 0); + __ ldrb(rscratch1, data_addr); + __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant()); + __ strb(rscratch1, data_addr); + __ b(*obj_is_null); + __ bind(not_null); + } else { + __ cbz(obj, *obj_is_null); + } + + if (!k->is_loaded()) { + klass2reg_with_patching(k_RInfo, op->info_for_patch()); + } else { + __ mov_metadata(k_RInfo, k->constant_encoding()); + } + __ verify_oop(obj); + + if (op->fast_check()) { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(rscratch1, obj); + __ cmp( rscratch1, k_RInfo); + + __ b(*failure_target, Assembler::NE); + // successful cast, fall through to profile or jump + } else { + // get object class + // not a safepoint as obj null check happens earlier + __ load_klass(klass_RInfo, obj); + if (k->is_loaded()) { + // See if we get an immediate positive hit + __ ldr(rscratch1, Address(klass_RInfo, long(k->super_check_offset()))); + __ cmp(k_RInfo, rscratch1); + if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { + __ b(*failure_target, Assembler::NE); + // successful cast, fall through to profile or jump + } else { + // See if we get an immediate positive hit + __ b(*success_target, Assembler::EQ); + // check for self + __ cmp(klass_RInfo, k_RInfo); + __ b(*success_target, Assembler::EQ); + + __ push(klass_RInfo); + __ push(k_RInfo); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize))); + + // result is a boolean + __ cbz(klass_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } else { + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ push(klass_RInfo); + __ push(k_RInfo); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ ldr(k_RInfo, Address(__ post(sp, 2 * wordSize))); + + // result is a boolean + __ cbz(k_RInfo, *failure_target); + // successful cast, fall through to profile or jump + } + } + if (should_profile) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, obj); + Label update_done; + type_profile_helper(mdo, md, data, recv, success); + __ b(*success); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, CounterData::count_offset()), + 0); + __ ldr(rscratch1, counter_addr); + __ sub(rscratch1, rscratch1, DataLayout::counter_increment); + __ str(rscratch1, counter_addr); + __ b(*failure); + } + __ b(*success); +} + + +void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { + LIR_Code code = op->code(); + if (code == lir_store_check) { + Register value = op->object()->as_register(); + Register array = op->array()->as_register(); + Register k_RInfo = op->tmp1()->as_register(); + Register klass_RInfo = op->tmp2()->as_register(); + Register Rtmp1 = op->tmp3()->as_register(); + + CodeStub* stub = op->stub(); + + // check if it needs to be profiled + ciMethodData* md; + ciProfileData* data; + + const bool should_profile = op->should_profile(); + if (should_profile) { + ciMethod* method = op->profiled_method(); + assert(method != NULL, "Should have method"); + int bci = op->profiled_bci(); + md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + data = md->bci_to_data(bci); + assert(data != NULL, "need data for type check"); + assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check"); + } + Label profile_cast_success, profile_cast_failure, done; + Label *success_target = should_profile ? &profile_cast_success : &done; + Label *failure_target = should_profile ? &profile_cast_failure : stub->entry(); + + if (should_profile) { + Label not_null; + __ cbnz(value, not_null); + // Object is null; update MDO and exit + Register mdo = klass_RInfo; + __ mov_metadata(mdo, md->constant_encoding()); + Address data_addr + = __ form_address(rscratch2, mdo, + md->byte_offset_of_slot(data, DataLayout::flags_offset()), + 0); + __ ldrb(rscratch1, data_addr); + __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant()); + __ strb(rscratch1, data_addr); + __ b(done); + __ bind(not_null); + } else { + __ cbz(value, done); + } + + add_debug_info_for_null_check_here(op->info_for_exception()); + __ load_klass(k_RInfo, array); + __ load_klass(klass_RInfo, value); + + // get instance klass (it's already uncompressed) + __ ldr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset())); + // perform the fast part of the checking logic + __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); + // call out-of-line instance of __ check_klass_subtype_slow_path(...): + __ push(klass_RInfo); + __ push(k_RInfo); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ ldr(k_RInfo, Address(__ post(sp, 2 * wordSize))); + // result is a boolean + __ cbz(k_RInfo, *failure_target); + // fall through to the success case + + if (should_profile) { + Register mdo = klass_RInfo, recv = k_RInfo; + __ bind(profile_cast_success); + __ mov_metadata(mdo, md->constant_encoding()); + __ load_klass(recv, value); + type_profile_helper(mdo, md, data, recv, &done); + __ b(done); + + __ bind(profile_cast_failure); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + __ lea(rscratch2, counter_addr); + __ ldr(rscratch1, Address(rscratch2)); + __ sub(rscratch1, rscratch1, DataLayout::counter_increment); + __ str(rscratch1, Address(rscratch2)); + __ b(*stub->entry()); + } + + __ bind(done); + } else if (code == lir_checkcast) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success; + emit_typecheck_helper(op, &success, op->stub()->entry(), &success); + __ bind(success); + if (dst != obj) { + __ mov(dst, obj); + } + } else if (code == lir_instanceof) { + Register obj = op->object()->as_register(); + Register dst = op->result_opr()->as_register(); + Label success, failure, done; + emit_typecheck_helper(op, &success, &failure, &failure); + __ bind(failure); + __ mov(dst, 0); + __ b(done); + __ bind(success); + __ mov(dst, 1); + __ bind(done); + } else { + ShouldNotReachHere(); + } +} + +// TODO: reuse masm cmpxchgw +void LIR_Assembler::casw(Register addr, Register newval, Register cmpval, Register result) { + assert(newval != cmpval, "must be different"); + Label retry_load, nope; + // flush and load exclusive from the memory location + // and fail if it is not what we expect + __ bind(retry_load); + __ ldrex(result, addr); + __ cmp(result, cmpval); + __ mov(result, 1, Assembler::NE); + __ b(nope, Assembler::NE); + // if we store+flush with no intervening write rscratch1 wil be zero + __ strex(result, newval, addr); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + __ cbnz(result, retry_load); + __ membar(__ AnyAny); + __ bind(nope); +} + +void LIR_Assembler::casl(Register addr, Register newval_lo, Register newval_hi, Register cmpval_lo, Register cmpval_hi, Register tmp_lo, Register tmp_hi, Register result) { + assert(newval_lo->successor() == newval_hi, "must be contiguous"); + assert(tmp_lo->successor() == tmp_hi, "must be contiguous"); + assert(tmp_lo->encoding_nocheck() % 2 == 0, "Must be an even register"); + assert_different_registers(newval_lo, newval_hi, cmpval_lo, cmpval_hi, tmp_lo, tmp_hi); + + Label retry_load, nope; + // flush and load exclusive from the memory location + // and fail if it is not what we expect + __ bind(retry_load); + __ mov(result, 1); + __ ldrexd(tmp_lo, addr); + __ cmp(tmp_lo, cmpval_lo); + __ b(nope, Assembler::NE); + __ cmp(tmp_hi, cmpval_hi); + __ b(nope, Assembler::NE); + // if we store+flush with no intervening write rscratch1 wil be zero + __ strexd(result, newval_lo, addr); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + __ cbnz(result, retry_load); + __ membar(__ AnyAny); + __ bind(nope); +} + + +void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { + Register addr; + if (op->addr()->is_register()) { + addr = as_reg(op->addr()); + } else { + assert(op->addr()->is_address(), "what else?"); + LIR_Address* addr_ptr = op->addr()->as_address_ptr(); + assert(addr_ptr->disp() == 0, "need 0 disp"); + assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index"); + addr = as_reg(addr_ptr->base()); + } + Register result = as_reg(op->result_opr()); + if (op->code() == lir_cas_obj || op->code() == lir_cas_int) { + Register newval = as_reg(op->new_value()); + Register cmpval = as_reg(op->cmp_value()); + casw(addr, newval, cmpval, result); + } else if (op->code() == lir_cas_long){ + Register newval_lo = op->new_value()->as_register_lo(); + Register newval_hi = op->new_value()->as_register_hi(); + Register cmpval_lo = op->cmp_value()->as_register_lo(); + Register cmpval_hi = op->cmp_value()->as_register_hi(); + Register tmp_lo = op->tmp1()->as_register_lo(); + Register tmp_hi = op->tmp1()->as_register_hi(); + casl(addr, newval_lo, newval_hi, cmpval_lo, cmpval_hi, tmp_lo, tmp_hi, result); + } else { + ShouldNotReachHere(); + } +} + +static void patch_condition(address start_insn, address end_insn, Assembler::Condition cond) { + for (uint32_t* insn_p = (uint32_t*) start_insn; (address) insn_p < end_insn; ++insn_p) { + uint32_t insn = *insn_p; + assert((insn >> 28) == Assembler::AL, "instructions in patch" + " should allow conditional form and be in ALWAYS condition"); + *insn_p = (insn & 0x0fffffff) | (cond << 28); + } +} + +void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) { + + Assembler::Condition acond, ncond; + switch (condition) { + case lir_cond_equal: acond = Assembler::EQ; ncond = Assembler::NE; break; + case lir_cond_notEqual: acond = Assembler::NE; ncond = Assembler::EQ; break; + case lir_cond_less: acond = Assembler::LT; ncond = Assembler::GE; break; + case lir_cond_greaterEqual: acond = Assembler::GE; ncond = Assembler::LT; break; + case lir_cond_lessEqual: acond = Assembler::LE; ncond = Assembler::GT; break; + case lir_cond_greater: acond = Assembler::GT; ncond = Assembler::LE; break; + case lir_cond_belowEqual: Unimplemented(); return; + case lir_cond_aboveEqual: Unimplemented(); return; + default: ShouldNotReachHere(); return; + } + if (type == T_LONG) { + // for the lessEqual and greater conditions the jlong operands are swapped + // during comparison and hence should use mirror condition in conditional + // instruction. see comp_op()) + switch (condition) { + case lir_cond_lessEqual: acond = Assembler::GE; ncond = Assembler::LT; break; + case lir_cond_greater: acond = Assembler::LT; ncond = Assembler::GE; break; + } + } + + address true_instrs = __ pc(); + if (opr1->is_cpu_register()) { + reg2reg(opr1, result); + } else if (opr1->is_stack()) { + stack2reg(opr1, result, result->type()); + } else if (opr1->is_constant()) { + const2reg(opr1, result, lir_patch_none, NULL); + } else { + ShouldNotReachHere(); + } + patch_condition(true_instrs, __ pc(), acond); + + address false_instrs = __ pc(); + if (opr2->is_cpu_register()) { + reg2reg(opr2, result); + } else if (opr2->is_stack()) { + stack2reg(opr2, result, result->type()); + } else if (opr2->is_constant()) { + const2reg(opr2, result, lir_patch_none, NULL); + } else { + ShouldNotReachHere(); + } + patch_condition(false_instrs, __ pc(), ncond); +} + +void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); + + if (left->is_single_cpu()) { + assert(left->type() != T_FLOAT, "expect integer type"); + assert(right->type() != T_FLOAT, "expect integer type"); + assert(dest->type() != T_FLOAT, "expect integer type"); + + Register lreg = left->as_register(); + Register dreg = as_reg(dest); + + if (right->is_single_cpu()) { + // cpu register - cpu register + + assert((left->type() == T_INT || left->type() == T_OBJECT) + && right->type() == T_INT + && dest->type() == T_INT, + "should be"); + Register rreg = right->as_register(); + switch (code) { + case lir_add: __ add (dest->as_register(), lreg, rreg); break; + case lir_sub: __ sub (dest->as_register(), lreg, rreg); break; + case lir_mul: __ mul (dest->as_register(), lreg, rreg); break; + default: ShouldNotReachHere(); + } + + } else if (right->is_double_cpu()) { + ShouldNotReachHere(); // for obj+long op the generator casts long to int before invoking add + } else if (right->is_constant()) { + // cpu register - constant + jint c = right->as_constant_ptr()->as_jint(); + + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c == 0 && dreg == lreg) { + COMMENT("effective nop elided"); + return; + } + + if (Assembler::operand_valid_for_add_sub_immediate(c)) { + switch (code) { + case lir_add: __ add(dreg, lreg, c); break; + case lir_sub: __ sub(dreg, lreg, c); break; + default: ShouldNotReachHere(); + } + } else { + __ mov(rscratch1, c); + switch (code) { + case lir_add: __ add(dreg, lreg, rscratch1); break; + case lir_sub: __ sub(dreg, lreg, rscratch1); break; + default: ShouldNotReachHere(); + } + } + } else { + ShouldNotReachHere(); + } + + } else if (left->is_double_cpu()) { + assert(left->type() != T_DOUBLE, "expect integer type"); + assert(right->type() != T_DOUBLE, "expect integer type"); + assert(dest->type() != T_DOUBLE, "expect integer type"); + + Register lreg_lo = left->as_register_lo(); + Register lreg_hi = left->as_register_hi(); + + if (right->is_double_cpu()) { + // cpu register - cpu register + Register rreg_lo = right->as_register_lo(); + Register rreg_hi = right->as_register_hi(); + Register dreg_lo = dest->as_register_lo(); + Register dreg_hi = dest->as_register_hi(); + if (code == lir_add || code == lir_sub) { + check_register_collision(dreg_lo, &lreg_hi, &rreg_hi); + } + switch (code) { + case lir_add: __ adds (dreg_lo, lreg_lo, rreg_lo); + __ adc (dreg_hi, lreg_hi, rreg_hi); break; + case lir_sub: __ subs (dreg_lo, lreg_lo, rreg_lo); + __ sbc (dreg_hi, lreg_hi, rreg_hi); break; + case lir_mul: __ mult_long (dreg_lo, dreg_hi, + lreg_lo, lreg_hi, rreg_lo, rreg_hi); break; + default: + ShouldNotReachHere(); + } + + } else if (right->is_constant()) { + const jint c_lo = right->as_constant_ptr()->as_jint_lo_bits(); + const jint c_hi = right->as_constant_ptr()->as_jint_hi_bits(); + const Register dreg_lo = dest->as_register_lo(); + const Register dreg_hi = dest->as_register_hi(); + assert(code == lir_add || code == lir_sub, "mismatched arithmetic op"); + if (c_lo == 0 && c_hi == 0 && dreg_lo == lreg_lo && dreg_hi == lreg_hi) { + COMMENT("effective nop elided"); + return; + } + check_register_collision(dreg_lo, &lreg_hi, NULL, rscratch2); + switch (code) { + case lir_add: + if (Assembler::operand_valid_for_add_sub_immediate(c_lo)) + __ adds(dreg_lo, lreg_lo, c_lo); + else { + __ mov(rscratch1, c_lo); + __ adds(dreg_lo, lreg_lo, rscratch1); + } + if (Assembler::operand_valid_for_add_sub_immediate(c_hi)) + __ adc(dreg_hi, lreg_hi, c_hi); + else { + __ mov(rscratch1, c_hi); + __ adc(dreg_lo, lreg_hi, rscratch1); + } + break; + case lir_sub: + if (Assembler::operand_valid_for_add_sub_immediate(c_lo)) + __ subs(dreg_lo, lreg_lo, c_lo); + else { + __ mov(rscratch1, c_lo); + __ subs(dreg_lo, lreg_lo, rscratch1); + } + if (Assembler::operand_valid_for_add_sub_immediate(c_hi)) + __ sbc(dreg_hi, lreg_hi, c_hi); + else { + __ mov(rscratch1, c_hi); + __ sbc(dreg_hi, lreg_hi, rscratch1); + } + break; + default: + ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + } else if (left->is_single_fpu()) { + assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register"); + switch (code) { + case lir_add: __ vadd_f32 (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_sub: __ vsub_f32 (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_mul: __ vmul_f32 (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + case lir_div: __ vdiv_f32 (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break; + default: + ShouldNotReachHere(); + } + } else if (left->is_double_fpu()) { + if (right->is_double_fpu()) { + // cpu register - cpu register + switch (code) { + case lir_add: __ vadd_f64 (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_sub: __ vsub_f64 (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_mul: __ vmul_f64 (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + case lir_div: __ vdiv_f64 (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break; + default: + ShouldNotReachHere(); + } + } else { + if (right->is_constant()) { + ShouldNotReachHere(); + } + ShouldNotReachHere(); + } + } else if (left->is_single_stack() || left->is_address()) { + assert(left == dest, "left and dest must be equal"); + ShouldNotReachHere(); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) { + switch(code) { + case lir_abs : __ vabs_f64(dest->as_double_reg(), value->as_double_reg()); break; + case lir_sqrt: __ vsqrt_f64(dest->as_double_reg(), value->as_double_reg()); break; + default : ShouldNotReachHere(); + } +} + +void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) { + + assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register"); + Register Rleft = left->is_single_cpu() ? left->as_register() : + left->as_register_lo(); + if (dst->is_single_cpu()) { + Register Rdst = dst->as_register(); + if (right->is_constant()) { + switch (code) { + case lir_logic_and: __ andr (Rdst, Rleft, right->as_jint()); break; + case lir_logic_or: __ orr (Rdst, Rleft, right->as_jint()); break; + case lir_logic_xor: __ eor (Rdst, Rleft, right->as_jint()); break; + default: ShouldNotReachHere(); break; + } + } else { + Register Rright = right->is_single_cpu() ? right->as_register() : + right->as_register_lo(); + switch (code) { + case lir_logic_and: __ andr (Rdst, Rleft, Rright); break; + case lir_logic_or: __ orr (Rdst, Rleft, Rright); break; + case lir_logic_xor: __ eor (Rdst, Rleft, Rright); break; + default: ShouldNotReachHere(); break; + } + } + } else { + assert(dst->is_double_cpu(), "mismatched logic op operand size"); + const Register Rdst_lo = dst->as_register_lo(); + const Register Rdst_hi = dst->as_register_hi(); + Register Rleft_hi = left->as_register_hi(); + if (right->is_constant()) { + // LIR generator enforces jlong constants to be valid_immediate12 + // so we know they fit into 32-bit int + switch (code) { + case lir_logic_and: __ andr (Rdst_lo, Rleft, (int)right->as_jlong()); break; + case lir_logic_or: __ orr (Rdst_lo, Rleft, (int)right->as_jlong()); break; + case lir_logic_xor: __ eor (Rdst_lo, Rleft, (int)right->as_jlong()); break; + default: ShouldNotReachHere(); break; + } + } else { + assert(right->is_double_cpu(), "mismatched logic op operand size"); + Register Rright_lo = right->as_register_lo(); + Register Rright_hi = right->as_register_hi(); + check_register_collision(Rdst_lo, &Rleft_hi, &Rright_hi); + switch (code) { + case lir_logic_and: __ andr (Rdst_lo, Rleft, Rright_lo); + __ andr (Rdst_hi, Rleft_hi, Rright_hi); break; + case lir_logic_or: __ orr (Rdst_lo, Rleft, Rright_lo); + __ orr (Rdst_hi, Rleft_hi, Rright_hi); break; + case lir_logic_xor: __ eor (Rdst_lo, Rleft, Rright_lo); + __ eor (Rdst_hi, Rleft_hi, Rright_hi); break; + default: ShouldNotReachHere(); break; + } + } + } +} + + + +void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) { Unimplemented(); } + +void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) { + if (opr1->is_single_cpu()) { + + assert(opr1->type() != T_FLOAT, "expect integer type");// softfp guard + assert(opr2->type() != T_FLOAT, "expect integer type"); + + Register reg1 = as_reg(opr1); + if (opr2->is_single_cpu()) { + // cpu register - cpu register + Register reg2 = opr2->as_register(); + if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) { + __ cmpoop(reg1, reg2); + } else { + assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?"); + __ cmp(reg1, reg2); + } + } else if (opr2->is_constant()) { + LIR_Const* c = opr2->as_constant_ptr(); + if (c->type() == T_INT) { + __ cmp(reg1, c->as_jint(), rscratch1, Assembler::C_DFLT); + } else if (c->type() == T_OBJECT || c->type() == T_ARRAY) { + jobject o = c->as_jobject(); + if (o == NULL) { + __ cmp(reg1, (int32_t)NULL_WORD); + } else { + jobject2reg(o, rscratch1); + __ cmpoop(reg1, rscratch1); + } + } else { + fatal("unexpected type: %s", basictype_to_str(c->type())); + } + } else if (opr2->is_address()) { + __ ldr(rscratch2, as_Address(opr2->as_address_ptr(), rscratch1, Address::IDT_INT)); + __ cmp(reg1, rscratch2); + } else { + ShouldNotReachHere(); + } + + } else if (opr1->is_double_cpu()) { + assert(opr1->type() == T_LONG, "expect jlong type"); + assert(opr2->type() == T_LONG, "expect jlong type"); + Register xlo = opr1->as_register_lo(); + Register xhi = opr1->as_register_hi(); + if (opr2->is_double_cpu()) { + // cpu register - cpu register + Register ylo = opr2->as_register_lo(); + Register yhi = opr2->as_register_hi(); + switch (condition) { + case lir_cond_equal: + case lir_cond_notEqual: + case lir_cond_belowEqual: + case lir_cond_aboveEqual: + // these need APSR.ZC. the ops below set them correctly (but not APSR.V) + __ cmp(xhi, yhi); + __ cmp(xlo, ylo, Assembler::EQ); + break; + case lir_cond_less: + case lir_cond_greaterEqual: + __ cmp(xlo, ylo); + __ sbcs(rscratch1, xhi, yhi); + break; + case lir_cond_lessEqual: + case lir_cond_greater: + // here goes a trick: the below operations do not produce the valid + // value for the APSR.Z flag and there is no easy way to set it. so + // we exchange the order of arguments in the comparison and use the + // opposite condition in the conditional statement that follows. + // GE should be used instead of LE and LT in place of GT. + // the comp_op() could only be followed by: emit_opBranch(), cmove() and + // emit_assert(). these are patched to be aware of this trick + __ cmp(ylo, xlo); + __ sbcs(rscratch1, yhi, xhi); + break; + } + } else if (opr2->is_constant()) { + jlong y = opr2->as_jlong(); + assert(Assembler::operand_valid_for_add_sub_immediate(y), "immediate overflow"); + switch (condition) { + case lir_cond_equal: + case lir_cond_notEqual: + case lir_cond_belowEqual: + case lir_cond_aboveEqual: + __ cmp(xhi, (int)(y >> 32)); + __ cmp(xlo, (int)y, Assembler::EQ); + break; + case lir_cond_less: + case lir_cond_greaterEqual: + __ cmp(xlo, (int)y); + __ sbcs(rscratch1, xhi, (int)(y >> 32)); + break; + case lir_cond_lessEqual: + case lir_cond_greater: + __ rsbs(rscratch1, xlo, (int)y); + __ rscs(rscratch1, xhi, (int)(y >> 32)); + break; + } + } else { + ShouldNotReachHere(); + } + } else if (opr1->is_single_fpu()) { + FloatRegister reg1 = opr1->as_float_reg(); + assert(opr2->is_single_fpu(), "expect single float register"); + FloatRegister reg2 = opr2->as_float_reg(); + __ vcmp_f32(reg1, reg2); + __ get_fpsr(); + } else if (opr1->is_double_fpu()) { + FloatRegister reg1 = opr1->as_double_reg(); + assert(opr2->is_double_fpu(), "expect double float register"); + FloatRegister reg2 = opr2->as_double_reg(); + __ vcmp_f64(reg1, reg2); + __ get_fpsr(); + } else { + ShouldNotReachHere(); + } +} + +void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){ + if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) { + bool is_unordered_less = (code == lir_ucmp_fd2i); + if (left->is_single_fpu()) { + __ float_cmp(true, is_unordered_less ? -1 : 1, left->as_float_reg(), right->as_float_reg(), dst->as_register()); + } else if (left->is_double_fpu()) { + __ float_cmp(false, is_unordered_less ? -1 : 1, left->as_double_reg(), right->as_double_reg(), dst->as_register()); + } else { + ShouldNotReachHere(); + } + } else if (code == lir_cmp_l2i) { + __ mov(dst->as_register(), 1); + __ subs(rscratch1, left->as_register_lo(), right->as_register_lo()); + __ sbc(rscratch2, left->as_register_hi(), right->as_register_hi()); + __ orrs(rscratch1, rscratch1, rscratch2); + __ mov(dst->as_register(), -1, Assembler::MI); + __ mov(dst->as_register(), 0, Assembler::EQ); + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::align_call(LIR_Code code) { } + + +void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { + __ trampoline_call(Address(op->addr(), rtype)); + add_call_info(code_offset(), op->info()); +} + + +void LIR_Assembler::ic_call(LIR_OpJavaCall* op) { + __ ic_call(op->addr()); + add_call_info(code_offset(), op->info()); +} + + +/* Currently, vtable-dispatch is only enabled for sparc platforms */ +void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) { + ShouldNotReachHere(); +} + + +void LIR_Assembler::emit_static_call_stub() { + address call_pc = __ pc(); + address stub = __ start_a_stub(call_stub_size()); + if (stub == NULL) { + bailout("static call stub overflow"); + return; + } + + int start = __ offset(); + + __ relocate(static_stub_Relocation::spec(call_pc)); + __ mov_metadata(rmethod, (Metadata*)NULL); + __ movptr(rscratch1, 0); + __ b(rscratch1); + + assert(__ offset() - start <= call_stub_size(), "stub too big"); + __ end_a_stub(); +} + + +void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) { + assert(exceptionOop->as_register() == r0, "must match"); + assert(exceptionPC->as_register() == r3, "must match"); + + // exception object is not added to oop map by LinearScan + // (LinearScan assumes that no oops are in fixed registers) + info->add_register_oop(exceptionOop); + Runtime1::StubID unwind_id; + + // get current pc information + // pc is only needed if the method has an exception handler, the unwind code does not need it. + int pc_for_athrow_offset = __ offset(); + __ add(exceptionPC->as_register(), r15_pc, -8); + add_call_info(pc_for_athrow_offset, info); // for exception handler + + __ verify_not_null_oop(r0); + // search an exception handler (r0: exception oop, r3: throwing pc) + if (compilation()->has_fpu_code()) { + unwind_id = Runtime1::handle_exception_id; + } else { + unwind_id = Runtime1::handle_exception_nofpu_id; + } + __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id))); + + // FIXME: enough room for two byte trap ???? + __ nop(); +} + + +void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) { + assert(exceptionOop->as_register() == r0, "must match"); + + __ b(_unwind_handler_entry); +} + + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) { + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + + switch (left->type()) { + case T_INT: + case T_ADDRESS: + case T_OBJECT: + __ andr(rscratch1, count->as_register(), 0x1f); + switch (code) { + case lir_shl: __ lsl(dreg, lreg, rscratch1); break; + case lir_shr: __ asr(dreg, lreg, rscratch1); break; + case lir_ushr: __ lsr(dreg, lreg, rscratch1); break; + default: + ShouldNotReachHere(); + break; + } + break; + case T_LONG: + { + Register lreg_hi = left->as_register_hi(); + Register dreg_hi = dest->as_register_hi(); + const int word_bits = 8 * wordSize; + + if (code == lir_shl || code == lir_ushr) { + check_register_collision(dreg, &lreg, &lreg_hi, rscratch1); + check_register_collision(dreg_hi, &lreg, &lreg_hi, rscratch2); + } + + switch (code) { + case lir_shl: + __ andr(dreg, count->as_register(), 0x3f); + __ sub(dreg_hi, dreg, word_bits); + __ lsl(lreg_hi, lreg_hi, dreg); + __ orr(lreg_hi, lreg_hi, lreg, lsl(dreg_hi)); + __ rsb(dreg_hi, dreg, word_bits); + __ orr(dreg_hi, lreg_hi, lreg, lsr(dreg_hi)); + __ lsl(dreg, lreg, dreg); + break; + case lir_shr: { + __ mov(rscratch2, lreg_hi); + __ andr(rscratch1, count->as_register(), 0x3f); + __ lsr(dreg, lreg, rscratch1); + __ rsb(dreg_hi, rscratch1, word_bits); + __ orr(dreg, dreg, rscratch2, lsl(dreg_hi)); + __ asr(dreg_hi, rscratch2, rscratch1); + __ subs(rscratch1, rscratch1, word_bits); + __ mov(dreg, rscratch2, asr(rscratch1), Assembler::GT); + } + break; + case lir_ushr: + __ andr(dreg, count->as_register(), 0x3f); + __ lsr(lreg, lreg, dreg); + __ rsb(dreg_hi, dreg, word_bits); + __ orr(lreg, lreg, lreg_hi, lsl(dreg_hi)); + __ lsr(dreg_hi, lreg_hi, dreg); + __ sub(dreg, dreg, word_bits); + __ orr(dreg, lreg, lreg_hi, lsr(dreg)); + break; + default: + ShouldNotReachHere(); + break; + } + } + break; + default: + ShouldNotReachHere(); + break; + } +} + + +void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) { + Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo(); + Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo(); + + if (!count) { + reg2reg(left, dest); + return; + } + + switch (left->type()) { + case T_INT: + case T_ADDRESS: + case T_OBJECT: + switch (code) { + case lir_shl: __ lsl(dreg, lreg, count); break; + case lir_shr: __ asr(dreg, lreg, count); break; + case lir_ushr: __ lsr(dreg, lreg, count); break; + default: + ShouldNotReachHere(); + break; + } + break; + case T_LONG: { + Register lreg_hi = left->as_register_hi(); + Register dreg_hi = dest->as_register_hi(); + const int word_bits = 8 * wordSize; + + switch (code) { + case lir_shl: + if (count >= word_bits) { + __ lsl(dreg_hi, lreg, count - word_bits); + __ mov(dreg, 0); + } else { + check_register_collision(dreg_hi, &lreg); + __ lsl(dreg_hi, lreg_hi, count); + __ orr(dreg_hi, dreg_hi, lreg, lsr(word_bits - count)); + __ lsl(dreg, lreg, count); + } + break; + case lir_shr: + if (count >= word_bits) { + __ asr(dreg, lreg_hi, count - word_bits); + __ asr(dreg_hi, lreg_hi, word_bits); + } else { + check_register_collision(dreg, &lreg_hi); + __ lsr(dreg, lreg, count); + __ orr(dreg, dreg, lreg_hi, lsl(word_bits - count)); + __ asr(dreg_hi, lreg_hi, count); + } + break; + case lir_ushr: + if (count >= word_bits) { + __ lsr(dreg, lreg_hi, count - word_bits); + __ mov(dreg_hi, 0); + } else { + check_register_collision(dreg, &lreg_hi); + __ lsr(dreg, lreg, count); + __ orr(dreg, dreg, lreg_hi, lsl(word_bits - count)); + __ lsr(dreg_hi, lreg_hi, count); + } + break; + default: + ShouldNotReachHere(); + break; + } + } + break; + default: + ShouldNotReachHere(); + break; + } +} + + +void LIR_Assembler::store_parameter(Register r, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ str (r, Address(sp, offset_from_sp_in_bytes)); +} + + +void LIR_Assembler::store_parameter(jint c, int offset_from_sp_in_words) { + assert(offset_from_sp_in_words >= 0, "invalid offset from sp"); + int offset_from_sp_in_bytes = offset_from_sp_in_words * BytesPerWord; + assert(offset_from_sp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset"); + __ mov (rscratch1, c); + __ str (rscratch1, Address(sp, offset_from_sp_in_bytes)); +} + +// This code replaces a call to arraycopy; no exception may +// be thrown in this code, they must be thrown in the System.arraycopy +// activation frame; we could save some checks if this would not be the case +void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { + ciArrayKlass* default_type = op->expected_type(); + Register src = op->src()->as_register(); + Register dst = op->dst()->as_register(); + Register src_pos = op->src_pos()->as_register(); + Register dst_pos = op->dst_pos()->as_register(); + Register length = op->length()->as_register(); + Register tmp = op->tmp()->as_register(); + // due to limited number of registers available and in order to simplify + // the code we fix the registers used by the arguments to this intrinsic. + // see the comment in LIRGenerator::do_ArrayCopy + assert(src == j_rarg0, "assumed by implementation"); + assert(src_pos == j_rarg1, "assumed by implementation"); + assert(dst == j_rarg2, "assumed by implementation"); + assert(dst_pos == j_rarg3, "assumed by implementation"); + assert(length == r4, "assumed by implementation"); + assert(tmp == r5, "assumed by implementation"); + const int dst_spill_offset = 2*BytesPerWord; + const int dst_pos_spill_offset = 1*BytesPerWord; + const int length_spill_offset = 0*BytesPerWord; + const int src_pos_spill_offset = 3*BytesPerWord; + const int src_spill_offset = 4*BytesPerWord; + + CodeStub* stub = op->stub(); + int flags = op->flags(); + BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL; + if (basic_type == T_ARRAY) basic_type = T_OBJECT; + + // if we don't know anything, just go through the generic arraycopy + if (default_type == NULL // || basic_type == T_OBJECT + ) { + Label done; + assert(src == r0 && src_pos == r1, "mismatch in calling convention"); + + // Save the arguments in case the generic arraycopy fails and we + // have to fall back to the JNI stub. spill all but length since it's + // in the callee save register + __ str(dst, Address(sp, dst_spill_offset)); + __ str(dst_pos, Address(sp, dst_pos_spill_offset)); + __ str(src_pos, Address(sp, src_pos_spill_offset)); + __ str(src, Address(sp, src_spill_offset)); + + address copyfunc_addr = StubRoutines::generic_arraycopy(); + assert(copyfunc_addr != NULL, "generic arraycopy stub required"); + + // The arguments are in java calling convention so we shift them + // to C convention + assert(c_rarg0 == j_rarg0, "assumed in the code below"); + // the below C function follows C calling convention, + // so should put 5th arg to stack + assert(length_spill_offset == 0, "assumed in the code below"); + __ str(length, Address(sp)); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ increment(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); + } +#endif + __ far_call(RuntimeAddress(copyfunc_addr)); + + __ cbz(r0, *stub->continuation()); + + // r0 is -1^K where K == partial copied count + __ inv(rscratch1, r0); + + // Reload values from the stack so they are where the stub + // expects them. don't reload length since it's in the callee-save register + // and the value on the stack might have been modified by the C function + __ ldr(dst, Address(sp, dst_spill_offset)); + __ ldr(dst_pos, Address(sp, dst_pos_spill_offset)); + __ ldr(src_pos, Address(sp, src_pos_spill_offset)); + __ ldr(src, Address(sp, src_spill_offset)); + + // adjust length down and src/end pos up by partial copied count + __ sub(length, length, rscratch1); + __ add(src_pos, src_pos, rscratch1); + __ add(dst_pos, dst_pos, rscratch1); + + __ b(*stub->entry()); + + __ bind(*stub->continuation()); + return; + } + + assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point"); + + int elem_size = type2aelembytes(basic_type); + int scale = exact_log2(elem_size); + + Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes()); + Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes()); + Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes()); + Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); + + // test for NULL + if (flags & LIR_OpArrayCopy::src_null_check) { + __ cbz(src, *stub->entry()); + } + if (flags & LIR_OpArrayCopy::dst_null_check) { + __ cbz(dst, *stub->entry()); + } + + // If the compiler was not able to prove that exact type of the source or the destination + // of the arraycopy is an array type, check at runtime if the source or the destination is + // an instance type. + if (flags & LIR_OpArrayCopy::type_check) { + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + __ ldr(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ cmp(rscratch1, Klass::_lh_neutral_value); + __ b(*stub->entry(), Assembler::GE); + } + + if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + __ ldr(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset()))); + __ cmp(rscratch1, Klass::_lh_neutral_value); + __ b(*stub->entry(), Assembler::GE); + } + } + + + // check if negative + if (flags & LIR_OpArrayCopy::src_pos_positive_check) { + __ cmp(src_pos, 0); + __ b(*stub->entry(), Assembler::LT); + } + if (flags & LIR_OpArrayCopy::dst_pos_positive_check) { + __ cmp(dst_pos, 0); + __ b(*stub->entry(), Assembler::LT); + } + + if (flags & LIR_OpArrayCopy::length_positive_check) { + __ cmp(length, 0); + __ b(*stub->entry(), Assembler::LT); + } + + if (flags & LIR_OpArrayCopy::src_range_check) { + __ add(tmp, src_pos, length); + __ ldr(rscratch1, src_length_addr); + __ cmp(tmp, rscratch1); + __ b(*stub->entry(), Assembler::HI); + } + if (flags & LIR_OpArrayCopy::dst_range_check) { + __ add(tmp, dst_pos, length); + __ ldr(rscratch1, dst_length_addr); + __ cmp(tmp, rscratch1); + __ b(*stub->entry(), Assembler::HI); + } + + // FIXME: The logic in LIRGenerator::arraycopy_helper clears + // length_positive_check if the source of our length operand is an + // arraylength. However, that arraylength might be zero, and the + // stub that we're about to call contains an assertion that count != + // 0 . So we make this check purely in order not to trigger an + // assertion failure. + __ cbz(length, *stub->continuation()); + + if (flags & LIR_OpArrayCopy::type_check) { + // We don't know the array types are compatible + if (basic_type != T_OBJECT) { + // Simple test for basic type arrays + __ ldr(tmp, src_klass_addr); + __ ldr(rscratch1, dst_klass_addr); + __ cmp(tmp, rscratch1); + __ b(*stub->entry(), Assembler::NE); + } else { + // For object arrays, if src is a sub class of dst then we can + // safely do the copy. + Label cont, slow; + + __ push(RegSet::of(src, dst), sp); + + __ load_klass(src, src); + __ load_klass(dst, dst); + + __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); + + __ push(src); // sub + __ push(dst); // super + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + // result on TOS + __ pop(src); // result + __ pop(dst); + + __ cbnz(src, cont); + + __ bind(slow); + __ pop(RegSet::of(src, dst), sp); + + address copyfunc_addr = StubRoutines::checkcast_arraycopy(); + if (copyfunc_addr != NULL) { // use stub if available + // src is not a sub class of dst so we have to do a + // per-element check. + + int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray; + if ((flags & mask) != mask) { + // Check that at least both of them object arrays. + assert(flags & mask, "one of the two should be known to be an object array"); + + if (!(flags & LIR_OpArrayCopy::src_objarray)) { + __ load_klass(tmp, src); + } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) { + __ load_klass(tmp, dst); + } + int lh_offset = in_bytes(Klass::layout_helper_offset()); + Address klass_lh_addr(tmp, lh_offset); + jint objArray_lh = Klass::array_layout_helper(T_OBJECT); + __ ldr(rscratch1, klass_lh_addr); + __ mov(rscratch2, objArray_lh); + __ eor(rscratch1, rscratch1, rscratch2); + __ cbnz(rscratch1, *stub->entry()); + } + + // Spill because stub destroys r0-r3. + assert(length_spill_offset == 0, "assumed in the code below"); + assert(length == r4, "shall not be in r0-r3"); + __ str(dst_pos, Address(sp, dst_pos_spill_offset)); + __ str(dst, Address(sp, dst_spill_offset)); + __ str(src_pos, Address(sp, src_pos_spill_offset)); + __ str(src, Address(sp, src_spill_offset)); + + __ lea(c_rarg0, Address(src, src_pos, lsl(scale))); + __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg0, dst, dst_pos, length); + __ lea(c_rarg1, Address(dst, dst_pos, lsl(scale))); + __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg1, dst, dst_pos, length); + + __ load_klass(c_rarg2, dst); + __ ldr(c_rarg2, Address(c_rarg2, ObjArrayKlass::element_klass_offset())); + __ ldr(c_rarg3, Address(c_rarg2, Klass::super_check_offset_offset())); + __ str(c_rarg2, Address(sp)); + __ mov(c_rarg2, length); + __ far_call(RuntimeAddress(copyfunc_addr)); + +#ifndef PRODUCT + if (PrintC1Statistics) { + Label failed; + __ cbnz(r0, failed); + __ increment(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt)); + __ bind(failed); + } +#endif + + __ cbz(r0, *stub->continuation()); + +#ifndef PRODUCT + if (PrintC1Statistics) { + __ increment(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt)); + } +#endif + assert_different_registers(dst, dst_pos, length, src_pos, src, rscratch1); + // return value is -1^K where K is partial copied count + __ mvn(rscratch1, r0); + + // Restore previously spilled arguments + __ ldr(dst_pos, Address(sp, dst_pos_spill_offset)); + __ ldr(dst, Address(sp, dst_spill_offset)); + __ ldr(src_pos, Address(sp, src_pos_spill_offset)); + __ ldr(src, Address(sp, src_spill_offset)); + + // adjust length down and src/end pos up by partial copied count + __ sub(length, length, rscratch1); + __ add(src_pos, src_pos, rscratch1); + __ add(dst_pos, dst_pos, rscratch1); + } + + __ b(*stub->entry()); + + __ bind(cont); + __ pop(RegSet::of(src, dst), sp); + } + } + +#ifdef ASSERT + if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) { + // Sanity check the known type with the incoming class. For the + // primitive case the types must match exactly with src.klass and + // dst.klass each exactly matching the default type. For the + // object array case, if no type check is needed then either the + // dst type is exactly the expected type and the src type is a + // subtype which we can't check or src is the same array as dst + // but not necessarily exactly of type default_type. + Label known_ok, halt; + __ mov_metadata(tmp, default_type->constant_encoding()); + + if (basic_type != T_OBJECT) { + + __ ldr(rscratch1, dst_klass_addr); + __ cmp(tmp, rscratch1); + __ b(halt, Assembler::NE); + __ ldr(rscratch1, src_klass_addr); + __ cmp(tmp, rscratch1); + __ b(known_ok, Assembler::EQ); + } else { + __ ldr(rscratch1, dst_klass_addr); + __ cmp(tmp, rscratch1); + __ b(known_ok, Assembler::EQ); + __ cmp(src, dst); + __ b(known_ok, Assembler::EQ); + } + __ bind(halt); + __ stop("incorrect type information in arraycopy"); + __ bind(known_ok); + } +#endif + + __ lea(c_rarg0, Address(src, src_pos, lsl(scale))); + __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg0, dst, dst_pos, length); + __ lea(c_rarg1, Address(dst, dst_pos, lsl(scale))); + __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type)); + assert_different_registers(c_rarg1, dst, dst_pos, length); + __ mov(c_rarg2, length); + + bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; + bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; + const char *name; + address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false); + + CodeBlob *cb = CodeCache::find_blob(entry); + if (cb) { + __ far_call(RuntimeAddress(entry)); + } else { + __ call_VM_leaf(entry, 3); + } + + __ bind(*stub->continuation()); +} + +void LIR_Assembler::emit_lock(LIR_OpLock* op) { + Register obj = op->obj_opr()->as_register(); // may not be an oop + Register hdr = op->hdr_opr()->as_register(); + Register lock = op->lock_opr()->as_register(); + if (!UseFastLocking) { + __ b(*op->stub()->entry()); + } else if (op->code() == lir_lock) { + Register scratch = noreg; + if (UseBiasedLocking) { + scratch = op->scratch_opr()->as_register(); + } + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + // add debug info for NullPointerException only if one is possible + int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry()); + if (op->info() != NULL) { + add_debug_info_for_null_check(null_check_offset, op->info()); + } + // done + } else if (op->code() == lir_unlock) { + assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + __ unlock_object(hdr, obj, lock, *op->stub()->entry()); + } else { + Unimplemented(); + } + __ bind(*op->stub()->continuation()); +} + + +void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { + ciMethod* method = op->profiled_method(); + int bci = op->profiled_bci(); + ciMethod* callee = op->profiled_callee(); + + // Update counter for all call types + ciMethodData* md = method->method_data_or_null(); + assert(md != NULL, "Sanity"); + ciProfileData* data = md->bci_to_data(bci); + assert(data != NULL && data->is_CounterData(), "need CounterData for calls"); + assert(op->mdo()->is_single_cpu(), "mdo must be allocated"); + Register mdo = op->mdo()->as_register(); + __ mov_metadata(mdo, md->constant_encoding()); + Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset())); + // Perform additional virtual call profiling for invokevirtual and + // invokeinterface bytecodes + if (op->should_profile_receiver_type()) { + assert(op->recv()->is_single_cpu(), "recv must be allocated"); + Register recv = op->recv()->as_register(); + assert_different_registers(mdo, recv); + assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls"); + ciKlass* known_klass = op->known_holder(); + if (C1OptimizeVirtualCallProfiling && known_klass != NULL) { + // We know the type that will be seen at this call site; we can + // statically update the MethodData* rather than needing to do + // dynamic tests on the receiver type + + // NOTE: we should probably put a lock around this search to + // avoid collisions by concurrent compilations + ciVirtualCallData* vc_data = (ciVirtualCallData*) data; + uint i; + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (known_klass->equals(receiver)) { + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + return; + } + } + + // Receiver type not found in profile data; select an empty slot + + // Note that this is less efficient than it should be because it + // always does a write to the receiver part of the + // VirtualCallData rather than just the first time + for (i = 0; i < VirtualCallData::row_limit(); i++) { + ciKlass* receiver = vc_data->receiver(i); + if (receiver == NULL) { + Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i))); + __ mov_metadata(rscratch1, known_klass->constant_encoding()); + __ lea(rscratch2, recv_addr); + __ str(rscratch1, Address(rscratch2)); + Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i))); + __ addptr(data_addr, DataLayout::counter_increment); + return; + } + } + } else { + __ load_klass(recv, recv); + Label update_done; + type_profile_helper(mdo, md, data, recv, &update_done); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + __ addptr(counter_addr, DataLayout::counter_increment); + + __ bind(update_done); + } + } else { + // Static call + __ addptr(counter_addr, DataLayout::counter_increment); + } +} + + +void LIR_Assembler::emit_delay(LIR_OpDelay*) { + Unimplemented(); +} + + +void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) { + __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no)); +} + +void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) { + assert(op->crc()->is_single_cpu(), "crc must be register"); + assert(op->val()->is_single_cpu(), "byte value must be register"); + assert(op->result_opr()->is_single_cpu(), "result must be register"); + Register crc = op->crc()->as_register(); + Register val = op->val()->as_register(); + Register res = op->result_opr()->as_register(); + + assert_different_registers(val, crc, res); + __ lea(res, ExternalAddress(StubRoutines::crc_table_addr())); + + __ inv(crc, crc); + __ update_byte_crc32(crc, val, res); + __ inv(res, crc); +} + +void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { + COMMENT("emit_profile_type {"); + Register obj = op->obj()->as_register(); + Register tmp = op->tmp()->as_pointer_register(); + Address mdo_addr = as_Address(op->mdp()->as_address_ptr(), noreg, Address::IDT_INT); + ciKlass* exact_klass = op->exact_klass(); + intptr_t current_klass = op->current_klass(); + bool not_null = op->not_null(); + bool no_conflict = op->no_conflict(); + + Label update, next, none; + + bool do_null = !not_null; + bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass; + bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set; + + assert(do_null || do_update, "why are we here?"); + assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?"); + assert(mdo_addr.base() != rscratch1, "wrong register"); + + __ verify_oop(obj); + + if (tmp != obj) { + __ mov(tmp, obj); + } + if (do_null) { + __ cbnz(tmp, update); + if (!TypeEntries::was_null_seen(current_klass)) { + __ ldr(rscratch2, mdo_addr); + __ orr(rscratch2, rscratch2, TypeEntries::null_seen); + __ str(rscratch2, mdo_addr); + } + if (do_update) { +#ifndef ASSERT + __ b(next); + } +#else + __ b(next); + } + } else { + __ cbnz(tmp, update); + __ stop("unexpected null obj"); +#endif + } + + __ bind(update); + + if (do_update) { +#ifdef ASSERT + if (exact_klass != NULL) { + Label ok; + __ load_klass(tmp, tmp); + __ mov_metadata(rscratch1, exact_klass->constant_encoding()); + __ eor(rscratch1, tmp, rscratch1); + __ cbz(rscratch1, ok); + __ stop("exact klass and actual klass differ"); + __ bind(ok); + } +#endif + if (!no_conflict) { + if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) { + if (exact_klass != NULL) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + } else { + __ load_klass(tmp, tmp); + } + + __ ldr(rscratch2, mdo_addr); + __ eor(tmp, tmp, rscratch2); + __ bics(rscratch1, tmp, ~TypeEntries::type_klass_mask); + // klass seen before, nothing to do. The unknown bit may have been + // set already but no need to check. + __ b(next, Assembler::EQ); + + __ ands(rscratch1, tmp, TypeEntries::type_unknown); + __ b(next, Assembler::NE); // already unknown. Nothing to do anymore. + + if (TypeEntries::is_type_none(current_klass)) { + __ cbz(rscratch2, none); + __ cmp(rscratch2, TypeEntries::null_seen); + __ b(none, Assembler::EQ); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + __ dmb(Assembler::ISH); + __ ldr(rscratch2, mdo_addr); + __ eor(tmp, tmp, rscratch2); + __ bics(rscratch1, tmp, ~TypeEntries::type_klass_mask); + __ b(next, Assembler::EQ); + } + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only"); + + __ ldr(tmp, mdo_addr); + __ ands(rscratch1, tmp, TypeEntries::type_unknown); + __ b(next, Assembler::NE); // already unknown. Nothing to do anymore. + } + + // different than before. Cannot keep accurate profile. + __ ldr(rscratch2, mdo_addr); + __ orr(rscratch2, rscratch2, TypeEntries::type_unknown); + __ str(rscratch2, mdo_addr); + + if (TypeEntries::is_type_none(current_klass)) { + __ b(next); + + __ bind(none); + // first time here. Set profile type. + __ str(tmp, mdo_addr); + } + } else { + // There's a single possible klass at this profile point + assert(exact_klass != NULL, "should be"); + if (TypeEntries::is_type_none(current_klass)) { + __ mov_metadata(tmp, exact_klass->constant_encoding()); + __ ldr(rscratch2, mdo_addr); + __ eor(tmp, tmp, rscratch2); + __ bics(rscratch1, tmp, ~TypeEntries::type_klass_mask); + __ b(next, Assembler::EQ); +#ifdef ASSERT + { + Label ok; + __ ldr(rscratch1, mdo_addr); + __ cbz(rscratch1, ok); + __ cmp(rscratch1, TypeEntries::null_seen); + __ b(ok, Assembler::EQ); + // may have been set by another thread + __ dmb(Assembler::ISH); + __ mov_metadata(rscratch1, exact_klass->constant_encoding()); + __ ldr(rscratch2, mdo_addr); + __ eor(rscratch2, rscratch1, rscratch2); + __ bics(rscratch2, rscratch2, ~TypeEntries::type_mask); + __ b(ok, Assembler::EQ); + + __ stop("unexpected profiling mismatch"); + __ bind(ok); + } +#endif + // first time here. Set profile type. + __ ldr(tmp, mdo_addr); + } else { + assert(ciTypeEntries::valid_ciklass(current_klass) != NULL && + ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent"); + + __ ldr(tmp, mdo_addr); + __ ands(rscratch1, tmp, TypeEntries::type_unknown); + __ b(next, Assembler::NE); // already unknown. Nothing to do anymore. + + __ orr(tmp, tmp, TypeEntries::type_unknown); + __ str(tmp, mdo_addr); + // FIXME: Write barrier needed here? + } + } + + __ bind(next); + } + COMMENT("} emit_profile_type"); +} + + +void LIR_Assembler::align_backward_branch_target() { +} + + +void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) { + if (left->is_single_cpu()) { + assert(left->type() != T_FLOAT, "expect integer type"); + assert(dest->type() != T_FLOAT, "expect integer type"); + assert(dest->is_single_cpu(), "expect single result reg"); + __ neg(dest->as_register(), left->as_register()); + } else if (left->is_double_cpu()) { + assert(left->type() != T_DOUBLE, "expect integer type"); + assert(dest->type() != T_DOUBLE, "expect integer type"); + assert(dest->is_double_cpu(), "expect double result reg"); + const Register l_lo = left->as_register_lo(); + Register l_hi = left->as_register_hi(); + check_register_collision(dest->as_register_lo(), &l_hi); + __ rsbs(dest->as_register_lo(), l_lo, 0); + __ rsc(dest->as_register_hi(), l_hi, 0); + } else if (left->is_single_fpu()) { + assert(dest->is_single_fpu(), "expect single float result reg"); + __ vneg_f32(dest->as_float_reg(), left->as_float_reg()); + } else if (left->is_double_fpu()) { + assert(left->is_double_fpu(), "expect double float operand reg"); + assert(dest->is_double_fpu(), "expect double float result reg"); + __ vneg_f64(dest->as_double_reg(), left->as_double_reg()); + } else { + ShouldNotReachHere(); + } +} + + +void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) { + assert(patch_code == lir_patch_none, "Patch code not supported"); + __ lea(dest->as_register(), as_Address(addr->as_address_ptr(), noreg, Address::IDT_LEA)); +} + + +void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) { + assert(!tmp->is_valid(), "don't need temporary"); + CodeBlob *cb = CodeCache::find_blob(dest); + if (cb) { + __ far_call(RuntimeAddress(dest)); + } else { + __ lea(rscratch1, RuntimeAddress(dest)); + __ bl(rscratch1); + } + if (info != NULL) { + add_call_info_here(info); + } + __ maybe_isb(); +} + +void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) { + if (type == T_LONG || type == T_DOUBLE) { + const LIR_Opr long_val = FrameMap::long0_opr; + + int null_check_offset = -1; + + if (src->is_register() && dest->is_address()) { + // long1 reserved as temp by LinearScan::pd_add_temps + const LIR_Opr long_tmp = FrameMap::long1_opr; + __ lea(rscratch1, as_Address_lo(dest->as_address_ptr(), Address::IDT_LEA)); + + + if (src->is_double_fpu()) { + assert(type == T_DOUBLE, "invalid register allocation"); + // long0 reserved as temp by LinearScan::pd_add_temps + __ vmov_f64(long_val->as_register_lo(), long_val->as_register_hi(), src->as_double_reg()); + } else { + assert(type == T_LONG && src->is_same_register(long_val), "T_LONG src should be in long0 (by LIRGenerator)"); + } + + null_check_offset = __ offset(); + __ atomic_strd(long_val->as_register_lo(), long_val->as_register_hi(), rscratch1, + long_tmp->as_register_lo(), long_tmp->as_register_hi()); + + } else if (src->is_address() && dest->is_register()) { + __ lea(rscratch1, as_Address_lo(src->as_address_ptr(), Address::IDT_LEA)); + + null_check_offset = __ offset(); + __ atomic_ldrd(long_val->as_register_lo(), long_val->as_register_hi(), rscratch1); + + if (dest->is_double_fpu()) { + __ vmov_f64(dest->as_double_reg(), long_val->as_register_lo(), long_val->as_register_hi()); + } else { + assert(type != T_LONG || dest->is_same_register(long_val), "T_LONG dest should be in long0 (by LIRGenerator)"); + } + } else { + Unimplemented(); + } + + if (info != NULL) { + add_debug_info_for_null_check(null_check_offset, info); + } + + } else { + move_op(src, dest, type, lir_patch_none, info, + /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false); + } +} + +#ifdef ASSERT +// emit run-time assertion +void LIR_Assembler::emit_assert(LIR_OpAssert* op) { + assert(op->code() == lir_assert, "must be"); + + if (op->in_opr1()->is_valid()) { + assert(op->in_opr2()->is_valid(), "both operands must be valid"); + comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op); + } else { + assert(op->in_opr2()->is_illegal(), "both operands must be illegal"); + assert(op->condition() == lir_cond_always, "no other conditions allowed"); + } + + Label ok; + if (op->condition() != lir_cond_always) { + Assembler::Condition acond = Assembler::AL; + switch (op->condition()) { + case lir_cond_equal: acond = Assembler::EQ; break; + case lir_cond_notEqual: acond = Assembler::NE; break; + case lir_cond_less: acond = Assembler::LT; break; + case lir_cond_greaterEqual: acond = Assembler::GE; break; + case lir_cond_lessEqual: acond = Assembler::LE; break; + case lir_cond_greater: acond = Assembler::GT; break; + case lir_cond_belowEqual: acond = Assembler::LS; break; + case lir_cond_aboveEqual: acond = Assembler::HS; break; + default: ShouldNotReachHere(); + } + if (op->in_opr1()->type() == T_LONG) { + // a special trick here to be able to effectively compare jlongs + // for the lessEqual and greater conditions the jlong operands are swapped + // during comparison and hence should use mirror condition in conditional + // instruction + // see LIR_Assembler::comp_op and LIR_Assembler::cmove + switch (op->condition()) { + case lir_cond_lessEqual: acond = Assembler::GE; break; + case lir_cond_greater: acond = Assembler::LT; break; + } + } + __ b(ok, acond); + } + if (op->halt()) { + const char* str = __ code_string(op->msg()); + __ stop(str); + } else { + breakpoint(); + } + __ bind(ok); +} +#endif + +#ifndef PRODUCT +#define COMMENT(x) do { __ block_comment(x); } while (0) +#else +#define COMMENT(x) +#endif + +void LIR_Assembler::membar() { + COMMENT("membar"); + __ membar(MacroAssembler::AnyAny); +} + +void LIR_Assembler::membar_acquire() { + __ membar(Assembler::LoadLoad|Assembler::LoadStore); +} + +void LIR_Assembler::membar_release() { + __ membar(Assembler::LoadStore|Assembler::StoreStore); +} + +void LIR_Assembler::membar_loadload() { + __ membar(Assembler::LoadLoad); +} + +void LIR_Assembler::membar_storestore() { + __ membar(MacroAssembler::StoreStore); +} + +void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); } + +void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); } + +void LIR_Assembler::on_spin_wait() { + Unimplemented(); +} + +void LIR_Assembler::get_thread(LIR_Opr result_reg) { + __ mov(result_reg->as_register(), rthread); +} + + +void LIR_Assembler::peephole(LIR_List *lir) { +#if 0 + if (tableswitch_count >= max_tableswitches) + return; + + /* + This finite-state automaton recognizes sequences of compare-and- + branch instructions. We will turn them into a tableswitch. You + could argue that C1 really shouldn't be doing this sort of + optimization, but without it the code is really horrible. + */ + + enum { start_s, cmp1_s, beq_s, cmp_s } state; + int first_key, last_key = -2147483648; + int next_key = 0; + int start_insn = -1; + int last_insn = -1; + Register reg = noreg; + LIR_Opr reg_opr; + state = start_s; + + LIR_OpList* inst = lir->instructions_list(); + for (int i = 0; i < inst->length(); i++) { + LIR_Op* op = inst->at(i); + switch (state) { + case start_s: + first_key = -1; + start_insn = i; + switch (op->code()) { + case lir_cmp: + LIR_Opr opr1 = op->as_Op2()->in_opr1(); + LIR_Opr opr2 = op->as_Op2()->in_opr2(); + if (opr1->is_cpu_register() && opr1->is_single_cpu() + && opr2->is_constant() + && opr2->type() == T_INT) { + reg_opr = opr1; + reg = opr1->as_register(); + first_key = opr2->as_constant_ptr()->as_jint(); + next_key = first_key + 1; + state = cmp_s; + goto next_state; + } + break; + } + break; + case cmp_s: + switch (op->code()) { + case lir_branch: + if (op->as_OpBranch()->cond() == lir_cond_equal) { + state = beq_s; + last_insn = i; + goto next_state; + } + } + state = start_s; + break; + case beq_s: + switch (op->code()) { + case lir_cmp: { + LIR_Opr opr1 = op->as_Op2()->in_opr1(); + LIR_Opr opr2 = op->as_Op2()->in_opr2(); + if (opr1->is_cpu_register() && opr1->is_single_cpu() + && opr1->as_register() == reg + && opr2->is_constant() + && opr2->type() == T_INT + && opr2->as_constant_ptr()->as_jint() == next_key) { + last_key = next_key; + next_key++; + state = cmp_s; + goto next_state; + } + } + } + last_key = next_key; + state = start_s; + break; + default: + assert(false, "impossible state"); + } + if (state == start_s) { + if (first_key < last_key - 5L && reg != noreg) { + { + // printf("found run register %d starting at insn %d low value %d high value %d\n", + // reg->encoding(), + // start_insn, first_key, last_key); + // for (int i = 0; i < inst->length(); i++) { + // inst->at(i)->print(); + // tty->print("\n"); + // } + // tty->print("\n"); + } + + struct tableswitch *sw = &switches[tableswitch_count]; + sw->_insn_index = start_insn, sw->_first_key = first_key, + sw->_last_key = last_key, sw->_reg = reg; + inst->insert_before(last_insn + 1, new LIR_OpLabel(&sw->_after)); + { + // Insert the new table of branches + int offset = last_insn; + for (int n = first_key; n < last_key; n++) { + inst->insert_before + (last_insn + 1, + new LIR_OpBranch(lir_cond_always, T_ILLEGAL, + inst->at(offset)->as_OpBranch()->label())); + offset -= 2, i++; + } + } + // Delete all the old compare-and-branch instructions + for (int n = first_key; n < last_key; n++) { + inst->remove_at(start_insn); + inst->remove_at(start_insn); + } + // Insert the tableswitch instruction + inst->insert_before(start_insn, + new LIR_Op2(lir_cmp, lir_cond_always, + LIR_OprFact::intConst(tableswitch_count), + reg_opr)); + inst->insert_before(start_insn + 1, new LIR_OpLabel(&sw->_branches)); + tableswitch_count++; + } + reg = noreg; + last_key = -2147483648; + } + next_state: + ; + } +#endif +} + +void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) { + BasicType type = src->type(); + Address addr = as_Address(src->as_address_ptr(), Address::toInsnDataType(type)); + + bool is_long = false; + + switch(type) { + case T_INT: + case T_OBJECT: + case T_ARRAY: + break; + case T_LONG: + is_long = true; + break; + default: + ShouldNotReachHere(); + } + + switch (code) { + case lir_xadd: + { + Register tmp = tmp_op->as_register(); + Register dst = as_reg(dest); + Label again; + __ lea(tmp, addr); + __ bind(again); + if(is_long) { + assert(dest->as_register_lo()->successor() == dest->as_register_hi(), "must be contiguous"); + assert((dest->as_register_lo()->encoding() & 1) == 0, "must be even"); + _masm->ldrexd(dst, tmp); + } else { + _masm->ldrex(dst, tmp); + } + arith_op(lir_add, dest, data, dest, NULL, false); + if (is_long) { + _masm->strexd(rscratch1, dst, tmp); + } else { + _masm->strex(rscratch1, dst, tmp); + } + __ cbnz(rscratch1, again); + arith_op(lir_sub, dest, data, dest, NULL, false); + break; + } + case lir_xchg: + { + Register tmp = tmp_op->as_register(); + Register obj = as_reg(data); + Register dst = as_reg(dest); + assert_different_registers(obj, addr.base(), tmp, rscratch1, dst); + Label again; + __ lea(tmp, addr); + __ bind(again); + if(is_long) { + assert(dest->as_register_lo()->successor() == dest->as_register_hi(), "must be contiguous"); + assert((dest->as_register_lo()->encoding() & 1) == 0, "must be even"); + + assert(data->is_double_cpu(), "should be double register"); + assert(data->as_register_lo()->successor() == data->as_register_hi(), "must be contiguous"); + assert((data->as_register_lo()->encoding() & 1) == 0, "must be even"); + + _masm->ldrexd(dst, tmp); + _masm->strexd(rscratch1, obj, tmp); + } else { + _masm->ldrex(dst, tmp); + _masm->strex(rscratch1, obj, tmp); + } + __ cbnz(rscratch1, again); + } + break; + default: + ShouldNotReachHere(); + } + __ membar(__ AnyAny); +} + +void LIR_Assembler::check_register_collision(Register d, Register *s1, Register *s2, Register tmp) { + // use a temp if any of the registers used as a source of operation + // collide with result register of the prerequisite operation + if (d == *s1) { + __ mov(tmp, d); + *s1 = tmp; + } else if (s2 && d == *s2) { + __ mov(tmp, d); + *s2 = tmp; + } +} + +#undef __ --- /dev/null 2018-09-25 19:24:24.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_LIRAssembler_aarch32.hpp 2018-09-25 19:24:24.000000000 +0300 @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_C1_LIRASSEMBLER_AARCH32_HPP +#define CPU_AARCH32_VM_C1_LIRASSEMBLER_AARCH32_HPP + +#include "assembler_aarch32.hpp" + +// ArrayCopyStub needs access to bailout +friend class ArrayCopyStub; + + private: + + int array_element_size(BasicType type) const; + + // helper functions which checks for overflow and sets bailout if it + // occurs. Always returns a valid embeddable pointer but in the + // bailout case the pointer won't be to unique storage. + address float_constant(float f); + address double_constant(double d); + + Address as_Address(LIR_Address* addr, Register tmp, Address::InsnDataType type); + Address as_Address_hi(LIR_Address* addr, Address::InsnDataType type); + Address as_Address_lo(LIR_Address* addr, Address::InsnDataType type); + + Address as_Address(LIR_Address* addr, Address::InsnDataType type) { + return as_Address(addr, rscratch1, type); + } + + + // Record the type of the receiver in ReceiverTypeData + void type_profile_helper(Register mdo, + ciMethodData *md, ciProfileData *data, + Register recv, Label* update_done); + void add_debug_info_for_branch(address adr, CodeEmitInfo* info); + + void casw(Register addr, Register newval, Register cmpval, Register result); + void casl(Register addr, Register newval_lo, Register newval_hi, + Register cmpval_lo, Register cmpval_hi, + Register tmp_lo, Register tmp_hi, Register result); + + FloatRegister as_float_reg(LIR_Opr doubleReg); + + static const int max_tableswitches = 20; + struct tableswitch switches[max_tableswitches]; + int tableswitch_count; + + void init() { tableswitch_count = 0; } + + void deoptimize_trap(CodeEmitInfo *info); + + enum { + _call_stub_size = 12 * NativeInstruction::arm_insn_sz, + _call_aot_stub_size = 0, + _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), + _deopt_handler_size = 7 * NativeInstruction::arm_insn_sz + }; + + // remap input register (*s1 or *s2) to a temp one if it is at the same time + // used a result register (d) of a preceeding operation (so otherwise its + // contents gets effectively corrupt) + void check_register_collision(Register d, Register *s1, Register *s2 = NULL, Register tmp = rscratch1); + +public: + + void store_parameter(Register r, int offset_from_sp_in_words); + void store_parameter(jint c, int offset_from_sp_in_words); + void store_parameter(jobject c, int offset_from_sp_in_words); + +#endif // CPU_AARCH32_VM_C1_LIRASSEMBLER_AARCH32_HPP --- /dev/null 2018-09-25 19:24:25.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_LIRGenerator_aarch32.cpp 2018-09-25 19:24:25.000000000 +0300 @@ -0,0 +1,1740 @@ +/* + * Copyright (c) 2005, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_Compilation.hpp" +#include "c1/c1_FrameMap.hpp" +#include "c1/c1_Instruction.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_LIRGenerator.hpp" +#include "c1/c1_Runtime1.hpp" +#include "c1/c1_ValueStack.hpp" +#include "ci/ciArray.hpp" +#include "ci/ciObjArrayKlass.hpp" +#include "ci/ciTypeArrayKlass.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_aarch32.inline.hpp" +#include "vm_version_aarch32.hpp" + +#ifdef ASSERT +#define __ gen()->lir(__FILE__, __LINE__)-> +#else +#define __ gen()->lir()-> +#endif + +// Item will be loaded into a byte register; Intel only +void LIRItem::load_byte_item() { + load_item(); +} + + +void LIRItem::load_nonconstant() { + LIR_Opr r = value()->operand(); + if (r->is_constant()) { + _result = r; + } else { + load_item(); + } +} + +//-------------------------------------------------------------- +// LIRGenerator +//-------------------------------------------------------------- + + +LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r0_oop_opr; } +LIR_Opr LIRGenerator::exceptionPcOpr() { return FrameMap::r3_opr; } +LIR_Opr LIRGenerator::divInOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::divOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::remOutOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::shiftCountOpr() { Unimplemented(); return LIR_OprFact::illegalOpr; } +LIR_Opr LIRGenerator::syncLockOpr() { return new_register(T_INT); } +LIR_Opr LIRGenerator::syncTempOpr() { return FrameMap::r0_opr; } +LIR_Opr LIRGenerator::getThreadTemp() { return LIR_OprFact::illegalOpr; } + + +LIR_Opr LIRGenerator::java_result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case floatTag: + if(hasFPU()) { + opr = FrameMap::fpu0_float_opr; break;; + } + case doubleTag: + if(hasFPU()) { + opr = FrameMap::fpu0_double_opr; break; + } + default: opr = result_register_for(type, callee); + } + return opr; +} +LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { + LIR_Opr opr; + switch (type->tag()) { + case floatTag: +#ifdef HARD_FLOAT_CC + opr = FrameMap::fpu0_float_opr; break; +#endif + case intTag: opr = FrameMap::r0_opr; break; + case objectTag: opr = FrameMap::r0_oop_opr; break; + case doubleTag: +#ifdef HARD_FLOAT_CC + opr = FrameMap::fpu0_double_opr; break; +#endif + case longTag: opr = FrameMap::long0_opr; break; + + case addressTag: + default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; + } +#ifndef HARD_FLOAT_CC + assert(type->is_float_kind() || opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); +#else + assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch"); +#endif + return opr; +} + + +LIR_Opr LIRGenerator::rlock_byte(BasicType type) { + LIR_Opr reg = new_register(T_INT); + set_vreg_flag(reg, LIRGenerator::byte_reg); + return reg; +} + + +//--------- loading items into registers -------------------------------- + + +bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const { + if (v->type()->as_IntConstant() != NULL) { + return v->type()->as_IntConstant()->value() == 0L; + } else if (v->type()->as_LongConstant() != NULL) { + return v->type()->as_LongConstant()->value() == 0L; + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + +bool LIRGenerator::can_inline_as_constant(Value v) const { + if (v->type()->as_IntConstant() != NULL) { + return Assembler::operand_valid_for_add_sub_immediate(v->type()->as_IntConstant()->value()); + } else if (v->type()->as_LongConstant() != NULL) { + return Assembler::operand_valid_for_add_sub_immediate(v->type()->as_LongConstant()->value()); + } else if (v->type()->as_ObjectConstant() != NULL) { + return v->type()->as_ObjectConstant()->value()->is_null_object(); + } else { + return false; + } +} + + +bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { + switch (c->type()) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + return Assembler::operand_valid_for_add_sub_immediate(c->as_jint()); + case T_LONG: + return Assembler::operand_valid_for_add_sub_immediate(c->as_jlong()); + case T_OBJECT: + return c->as_jobject() == (jobject) NULL; + case T_METADATA: + return c->as_metadata() == (Metadata*) NULL; + case T_FLOAT: + if( hasFPU()) { + return Assembler::operand_valid_for_float_immediate(c->as_jfloat()); + } else { + return Assembler::operand_valid_for_add_sub_immediate(c->as_jint()); + } + case T_DOUBLE: + if( hasFPU()) { + return Assembler::operand_valid_for_float_immediate(c->as_jdouble()); + } else { + return Assembler::operand_valid_for_add_sub_immediate(c->as_jlong()); + } + } + return false; +} + +LIR_Opr LIRGenerator::safepoint_poll_register() { + return LIR_OprFact::illegalOpr; +} + +LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, + int shift, int disp, BasicType type) { + const Address::InsnDataType insn_type = Address::toInsnDataType(type); + assert(base->is_register(), "must be"); + + // accumulate fixed displacements + if (index->is_constant()) { + assert(index->as_constant_ptr()->type() == T_INT, "assumed"); + disp += index->as_constant_ptr()->as_jint() << shift; + index = LIR_OprFact::illegalOpr; + shift = 0; + } + + // aarch32 cannot handle natively both index and offset at the same time + // need to calculate effective value + if (index->is_register()) { + if ((disp != 0) && + Address::shift_ok_for_index(lsl(shift), insn_type) && + Assembler::operand_valid_for_add_sub_immediate(disp)) { + // add tmp, base, disp + // ldr r, [tmp, index, LSL #shift ] + LIR_Opr tmp = new_pointer_register(); + __ add(base, LIR_OprFact::intptrConst(disp), tmp); + base = tmp; + disp = 0; + } else { + assert(shift <= (int) LIR_Address::times_8, "no large shift could be here"); + // add tmp, base, index, LSL #shift + // ... + // ldr r, [tmp, ...] + LIR_Opr tmp = new_pointer_register(); + __ leal(LIR_OprFact::address(new LIR_Address(base, index, (LIR_Address::Scale) shift, 0, type)), tmp); + base = tmp; + index = LIR_OprFact::illegalOpr; + shift = 0; + } + } + + assert(!index->is_register() || (disp == 0), "should be"); + + if (!Address::offset_ok_for_immed(disp, insn_type)) { + assert(!index->is_valid(), "should be"); + // here index should be illegal so we can replace it with the displacement + // loaded into a register + // mov tmp, disp + // ldr r, [base, tmp] + index = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(disp), index); + disp = 0; + } + + assert(Address::offset_ok_for_immed(disp, Address::toInsnDataType(type)), "must be"); + return new LIR_Address(base, index, (LIR_Address::Scale) shift, disp, type); +} + +LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr, + BasicType type) { + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); + int elem_size = type2aelembytes(type); + int shift = exact_log2(elem_size); + + LIR_Address* addr = generate_address(array_opr, index_opr, shift, offset_in_bytes, type); + + return addr; +} + +LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) { + LIR_Opr r; + if (type == T_LONG) { + r = LIR_OprFact::longConst(x); + if (!Assembler::operand_valid_for_logical_immediate(false, x)) { + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else if (type == T_INT) { + r = LIR_OprFact::intConst(x); + if (!Assembler::operand_valid_for_logical_immediate(true, x)) { + // This is all rather nasty. We don't know whether our constant + // is required for a logical or an arithmetic operation, wo we + // don't know what the range of valid values is!! + LIR_Opr tmp = new_register(type); + __ move(r, tmp); + return tmp; + } + } else { + ShouldNotReachHere(); + r = LIR_OprFact::illegalOpr; // unreachable + } + return r; +} + + + +void LIRGenerator::increment_counter(address counter, BasicType type, int step) { + LIR_Opr pointer = new_pointer_register(); + __ move(LIR_OprFact::intptrConst(counter), pointer); + LIR_Address* addr = new LIR_Address(pointer, type); + increment_counter(addr, step); +} + + +void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + LIR_Opr imm = NULL; + switch(addr->type()) { + case T_INT: + imm = LIR_OprFact::intConst(step); + break; + case T_LONG: + imm = LIR_OprFact::longConst(step); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr reg = new_register(addr->type()); + __ load(addr, reg); + __ add(reg, imm, reg); + __ store(reg, addr); +} + +void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { + LIR_Opr reg = new_register(T_INT); + __ load(generate_address(base, disp, T_INT), reg, info); + __ cmp(condition, reg, LIR_OprFact::intConst(c)); +} + +void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { + LIR_Opr reg1 = new_register(T_INT); + __ load(generate_address(base, disp, type), reg1, info); + __ cmp(condition, reg, reg1); +} + + +bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + + if (is_power_of_2(c - 1)) { + __ shift_left(left, exact_log2(c - 1), tmp); + __ add(tmp, left, result); + return true; + } else if (is_power_of_2(c + 1)) { + __ shift_left(left, exact_log2(c + 1), tmp); + __ sub(tmp, left, result); + return true; + } else { + return false; + } +} + +void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) { + BasicType type = item->type(); + __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type)); +} + +void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, ciMethod* profiled_method, int profiled_bci) { + LIR_Opr tmp1 = new_register(objectType); + LIR_Opr tmp2 = new_register(objectType); + LIR_Opr tmp3 = new_register(objectType); + __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci); +} + +//---------------------------------------------------------------------- +// visitor functions +//---------------------------------------------------------------------- + +void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { + assert(x->is_pinned(),""); + LIRItem obj(x->obj(), this); + obj.load_item(); + + set_no_result(x); + + // "lock" stores the address of the monitor stack slot, so this is not an oop + LIR_Opr lock = new_register(T_INT); + // Need a scratch register for biased locking + LIR_Opr scratch = LIR_OprFact::illegalOpr; + if (UseBiasedLocking) { + scratch = new_register(T_INT); + } + + CodeEmitInfo* info_for_exception = NULL; + if (x->needs_null_check()) { + info_for_exception = state_for(x); + } + // this CodeEmitInfo must not have the xhandlers because here the + // object is already locked (xhandlers expect object to be unlocked) + CodeEmitInfo* info = state_for(x, x->state(), true); + monitor_enter(obj.result(), lock, syncTempOpr(), scratch, + x->monitor_no(), info_for_exception, info); +} + + +void LIRGenerator::do_MonitorExit(MonitorExit* x) { + assert(x->is_pinned(),""); + + LIRItem obj(x->obj(), this); + obj.dont_load_item(); + + LIR_Opr lock = new_register(T_INT); + LIR_Opr obj_temp = new_register(T_INT); + set_no_result(x); + monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no()); +} + + +void LIRGenerator::do_NegateOp(NegateOp* x) { +#ifdef __SOFTFP__ + if(x->x()->type()->is_float_kind() && !(hasFPU())) { + address entry; + if (x->x()->type()->is_float()) { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::fneg); + } else { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::dneg); + } + LIR_Opr result = call_runtime(x->x(), entry, x->type(), NULL); + set_result(x, result); + } else +#endif + { + LIRItem from(x->x(), this); + from.load_item(); + LIR_Opr result = rlock_result(x); + __ negate (from.result(), result); + } +} + +// for _fadd, _fmul, _fsub, _fdiv, _frem +// _dadd, _dmul, _dsub, _ddiv, _drem +void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { + + if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { + address entry; + if (x->op() == Bytecodes::_frem) { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem); + } else { + entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem); + } + LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL); + set_result(x, result); + + return; + } + + if(hasFPU()) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + + // Always load right hand side. + right.load_item(); + + if (!left.is_register()) + left.load_item(); + + LIR_Opr reg = rlock(x); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) { + tmp = new_register(T_DOUBLE); + } + + arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), NULL); + + set_result(x, round_item(reg)); + } else { +#ifdef __SOFTFP__ + address entry; + + switch (x->op()) { + case Bytecodes::_fmul: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::fmul); + break; + case Bytecodes::_dmul: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::dmul); + break; + case Bytecodes::_fdiv: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::fdiv); + break; + case Bytecodes::_ddiv: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::ddiv); + break; + case Bytecodes::_fadd: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::fadd); + break; + case Bytecodes::_dadd: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::dadd); + break; + case Bytecodes::_fsub: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::fsub); + break; + case Bytecodes::_dsub: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsub); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL); + set_result(x, result); +#else + ShouldNotReachHere();// check your compiler settings +#endif + } +} + +// for _ladd, _lmul, _lsub, _ldiv, _lrem +void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) { + + // missing test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) { + + BasicTypeList signature(2); + signature.append(T_LONG); + signature.append(T_LONG); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + // check for division by zero (destroys registers of right operand!) + CodeEmitInfo* info = state_for(x); + + right.load_item(); + + __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0)); + __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info)); + + const LIR_Opr result_reg = result_register_for(x->type()); + left.load_item_force(cc->at(1)); + __ move(right.result(), cc->at(0)); + + address entry; + switch (x->op()) { + case Bytecodes::_lrem: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::lrem); + break; // check if dividend is 0 is done elsewhere + case Bytecodes::_ldiv: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::ldiv); + break; // check if dividend is 0 is done elsewhere + default: + ShouldNotReachHere(); return; // unreachable + } + + LIR_Opr result = rlock_result(x); + __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + } else { + assert (x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, + "expect lmul, ladd or lsub"); + // add, sub, mul + left.load_item(); + if (! right.is_register()) { + if (x->op() == Bytecodes::_lmul + || ! right.is_constant() + || ! Assembler::operand_valid_for_add_sub_immediate(right.get_jlong_constant())) { + right.load_item(); + } else { // add, sub + assert (x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub"); + // don't load constants to save register + right.load_nonconstant(); + } + } + rlock_result(x); + arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL); + } +} + +// for: _iadd, _imul, _isub, _idiv, _irem +void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) { + + // Test if instr is commutative and if we should swap + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + LIRItem* left_arg = &left; + LIRItem* right_arg = &right; + if (x->is_commutative() && left.is_stack() && right.is_register()) { + // swap them if left is real stack (or cached) and right is real register(not cached) + left_arg = &right; + right_arg = &left; + } + + left_arg->load_item(); + + // do not need to load right, as we can handle stack and constants + if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) { + + right_arg->load_item(); + rlock_result(x); + + if (!(VM_Version::features() & FT_HW_DIVIDE)) { + // MacroAssembler::divide32 destroys both operand registers + left_arg->set_destroys_register(); + right_arg->set_destroys_register(); + } + + CodeEmitInfo* info = state_for(x); + LIR_Opr tmp = new_register(T_INT); + __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::intConst(0)); + __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info)); + info = state_for(x); + + if (x->op() == Bytecodes::_irem) { + __ irem(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL); + } else if (x->op() == Bytecodes::_idiv) { + __ idiv(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL); + } + + } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) { + if (right.is_constant() + && Assembler::operand_valid_for_add_sub_immediate(right.get_jint_constant())) { + right.load_nonconstant(); + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr); + } else { + assert (x->op() == Bytecodes::_imul, "expect imul"); + if (right.is_constant()) { + jint c = right.get_jint_constant(); + if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) { + right_arg->dont_load_item(); + } else { + // Cannot use constant op. + right_arg->load_item(); + } + } else { + right.load_item(); + } + rlock_result(x); + arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT)); + } +} + +void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) { + // when an operand with use count 1 is the left operand, then it is + // likely that no move for 2-operand-LIR-form is necessary + if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) { + x->swap_operands(); + } + + ValueTag tag = x->type()->tag(); + assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters"); + switch (tag) { + case floatTag: + case doubleTag: do_ArithmeticOp_FPU(x); return; + case longTag: do_ArithmeticOp_Long(x); return; + case intTag: do_ArithmeticOp_Int(x); return; + } + ShouldNotReachHere(); +} + +// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr +void LIRGenerator::do_ShiftOp(ShiftOp* x) { + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant()) { + right.dont_load_item(); + + switch (x->op()) { + case Bytecodes::_ishl: { + int c = right.get_jint_constant() & 0x1f; + __ shift_left(left.result(), c, x->operand()); + break; + } + case Bytecodes::_ishr: { + int c = right.get_jint_constant() & 0x1f; + __ shift_right(left.result(), c, x->operand()); + break; + } + case Bytecodes::_iushr: { + int c = right.get_jint_constant() & 0x1f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + } + case Bytecodes::_lshl: { + int c = right.get_jint_constant() & 0x3f; + __ shift_left(left.result(), c, x->operand()); + break; + } + case Bytecodes::_lshr: { + int c = right.get_jint_constant() & 0x3f; + __ shift_right(left.result(), c, x->operand()); + break; + } + case Bytecodes::_lushr: { + int c = right.get_jint_constant() & 0x3f; + __ unsigned_shift_right(left.result(), c, x->operand()); + break; + } + default: + ShouldNotReachHere(); + } + } else { + right.load_item(); + LIR_Opr tmp = LIR_OprFact::illegalOpr; + if (left.result()->type() == T_LONG) + left.set_destroys_register(); + switch (x->op()) { + case Bytecodes::_ishl: { + __ shift_left(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_ishr: { + __ shift_right(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_iushr: { + __ unsigned_shift_right(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_lshl: { + __ shift_left(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_lshr: { + __ shift_right(left.result(), right.result(), x->operand(), tmp); + break; + } + case Bytecodes::_lushr: { + __ unsigned_shift_right(left.result(), right.result(), x->operand(), tmp); + break; + } + default: + ShouldNotReachHere(); + } + } +} + +// _iand, _land, _ior, _lor, _ixor, _lxor +void LIRGenerator::do_LogicOp(LogicOp* x) { + + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + + left.load_item(); + + rlock_result(x); + if (right.is_constant() + && ((right.type()->tag() == intTag + && Assembler::operand_valid_for_logical_immediate(true, right.get_jint_constant())) + || (right.type()->tag() == longTag + && Assembler::operand_valid_for_logical_immediate(false, right.get_jlong_constant())))) { + right.dont_load_item(); + } else { + right.load_item(); + } + switch (x->op()) { + case Bytecodes::_iand: + case Bytecodes::_land: + __ logical_and(left.result(), right.result(), x->operand()); break; + case Bytecodes::_ior: + case Bytecodes::_lor: + __ logical_or (left.result(), right.result(), x->operand()); break; + case Bytecodes::_ixor: + case Bytecodes::_lxor: + __ logical_xor(left.result(), right.result(), x->operand()); break; + default: Unimplemented(); + } +} + +// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg +void LIRGenerator::do_CompareOp(CompareOp* x) { + LIRItem left(x->x(), this); + LIRItem right(x->y(), this); + ValueTag tag = x->x()->type()->tag(); + left.load_item(); + right.load_item(); + + if (x->x()->type()->is_float_kind()) { + Bytecodes::Code code = x->op(); + if(hasFPU()) { + LIR_Opr reg = rlock_result(x); + __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl)); + } else { +#ifdef __SOFTFP__ + address entry; + switch (code) { + case Bytecodes::_fcmpl: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::fcmpl); + break; + case Bytecodes::_fcmpg: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::fcmpg); + break; + case Bytecodes::_dcmpl: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcmpl); + break; + case Bytecodes::_dcmpg: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcmpg); + break; + default: + ShouldNotReachHere(); + } + + LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL); + set_result(x, result); +#else + ShouldNotReachHere(); // check your compiler settings +#endif + } + } else if (x->x()->type()->tag() == longTag) { + LIR_Opr reg = rlock_result(x); + __ lcmp2int(left.result(), right.result(), reg); + } else { + Unimplemented(); + } +} + +LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, LIRItem& cmp_value, LIRItem& new_value) { + LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience + new_value.load_item(); + cmp_value.load_item(); + LIR_Opr result = new_register(T_INT); + if (type == T_OBJECT || type == T_ARRAY) { + __ cas_obj(addr, cmp_value.result(), new_value.result(), ill, ill, result); + } else if (type == T_INT) { + __ cas_int(addr, cmp_value.result(), new_value.result(), ill, ill, result); + } else if (type == T_LONG) { + __ cas_long(addr, cmp_value.result(), new_value.result(), FrameMap::long1_opr, ill, result); + } else { + ShouldNotReachHere(); + } + __ logical_xor(result, LIR_OprFact::intConst(1), result); + return result; +} + +LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) { + bool is_oop = type == T_OBJECT || type == T_ARRAY; + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT || is_oop, "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xchg(addr, value.result(), result, tmp); + return result; +} + +LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { + LIR_Opr result = new_register(type); + value.load_item(); + assert(type == T_INT, "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xadd(addr, value.result(), result, tmp); + return result; +} + +void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + switch (x->id()) { + default: + ShouldNotReachHere(); + break; + case vmIntrinsics::_dabs: + case vmIntrinsics::_dsqrt: + if(hasFPU()) { + assert(x->number_of_arguments() == 1, "wrong type"); + LIRItem value(x->argument_at(0), this); + value.load_item(); + LIR_Opr dst = rlock_result(x); + + switch (x->id()) { + case vmIntrinsics::_dsqrt: { + __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + case vmIntrinsics::_dabs: { + __ abs(value.result(), dst, LIR_OprFact::illegalOpr); + break; + } + } + break; + }// fall through for FPU less cores + case vmIntrinsics::_dlog10: // fall through + case vmIntrinsics::_dlog: // fall through + case vmIntrinsics::_dsin: // fall through + case vmIntrinsics::_dtan: // fall through + case vmIntrinsics::_dcos: // fall through + case vmIntrinsics::_dexp: { + assert(x->number_of_arguments() == 1, "wrong type"); + + address runtime_entry = NULL; + switch (x->id()) { +#ifdef __SOFTFP__ + case vmIntrinsics::_dabs: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dabs); + break; + case vmIntrinsics::_dsqrt: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); + break; +#endif + case vmIntrinsics::_dsin: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + break; + case vmIntrinsics::_dcos: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + break; + case vmIntrinsics::_dtan: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + break; + case vmIntrinsics::_dlog: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + break; + case vmIntrinsics::_dlog10: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + break; + case vmIntrinsics::_dexp: + runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + case vmIntrinsics::_dpow: { + assert(x->number_of_arguments() == 2, "wrong type"); + address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL); + set_result(x, result); + break; + } + } +} + + +void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); + + // Make all state_for calls early since they can emit code + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem src(x->argument_at(0), this); + LIRItem src_pos(x->argument_at(1), this); + LIRItem dst(x->argument_at(2), this); + LIRItem dst_pos(x->argument_at(3), this); + LIRItem length(x->argument_at(4), this); + + // operands for arraycopy must use fixed registers, otherwise + // LinearScan will fail allocation (because arraycopy always needs a + // call) + + // The java calling convention does not give us enough registers + // so we occupy two more: r4 and r5. The fast path code will be able to + // make use of these registers for performance purpose. If going into + // slow path we'll spill extra data to the stack as necessary + + src.load_item_force (FrameMap::as_oop_opr(j_rarg0)); + src_pos.load_item_force (FrameMap::as_opr(j_rarg1)); + dst.load_item_force (FrameMap::as_oop_opr(j_rarg2)); + dst_pos.load_item_force (FrameMap::as_opr(j_rarg3)); + + length.load_item_force (FrameMap::as_opr(r4)); + LIR_Opr tmp = FrameMap::as_opr(r5); + + set_no_result(x); + + int flags; + ciArrayKlass* expected_type; + arraycopy_helper(x, &flags, &expected_type); + + __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint +} + +void LIRGenerator::do_update_CRC32_inner(Intrinsic* x, int is_crc32c) { + assert(!is_crc32c ? UseCRC32Intrinsics : UseCRC32CIntrinsics, "why are we here?"); + // Make all state_for calls early since they can emit code + LIR_Opr result = rlock_result(x); + switch (x->id()) { + case vmIntrinsics::_updateCRC32: { + LIRItem crc(x->argument_at(0), this); + LIRItem val(x->argument_at(1), this); + // val is destroyed by update_crc32 + val.set_destroys_register(); + crc.load_item(); + val.load_item(); + __ update_crc32(crc.result(), val.result(), result); + break; + } + case vmIntrinsics::_updateBytesCRC32: + case vmIntrinsics::_updateByteBufferCRC32: + assert(!is_crc32c, "why are we here?"); + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: + { + if (is_crc32c) { + assert(x->id() == vmIntrinsics::_updateBytesCRC32C || + x->id() == vmIntrinsics::_updateDirectByteBufferCRC32C, "why are we here?"); + } + bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32) || + (x->id() == vmIntrinsics::_updateBytesCRC32C); + + LIRItem crc(x->argument_at(0), this); + LIRItem buf(x->argument_at(1), this); + LIRItem off(x->argument_at(2), this); + LIRItem len(x->argument_at(3), this); // length, or end in case of crc32c + buf.load_item(); + off.load_nonconstant(); + + LIR_Opr index = off.result(); + int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0; + if(off.result()->is_constant()) { + index = LIR_OprFact::illegalOpr; + offset += off.result()->as_jint(); + } + LIR_Opr base_op = buf.result(); + + if (!is_updateBytes) { // long b raw address + base_op = new_register(T_INT); + __ convert(Bytecodes::_l2i, buf.result(), base_op); + } + + if (offset) { + LIR_Opr tmp = new_pointer_register(); + __ add(base_op, LIR_OprFact::intConst(offset), tmp); + base_op = tmp; + offset = 0; + } + + LIR_Address* a = new LIR_Address(base_op, + index, + offset, + T_BYTE); + BasicTypeList signature(3); + signature.append(T_INT); + signature.append(T_ADDRESS); + signature.append(T_INT); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + const LIR_Opr result_reg = result_register_for(x->type()); + + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr); + + crc.load_item_force(cc->at(0)); + __ move(addr, cc->at(1)); + + if (!is_crc32c) { + len.load_item_force(cc->at(2)); + } else { + __ sub(len.result(), off.result(), cc->at(2)); + } + + __ call_runtime_leaf( + !is_crc32c ? + StubRoutines::updateBytesCRC32() : + StubRoutines::updateBytesCRC32C(), + getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + + break; + } + default: { + ShouldNotReachHere(); + } + } +} + +void LIRGenerator::do_update_CRC32(Intrinsic* x) { + do_update_CRC32_inner(x, false); +} + +void LIRGenerator::do_update_CRC32C(Intrinsic* x) { + do_update_CRC32_inner(x, true); +} + +void LIRGenerator::do_aescrypt_block(Intrinsic* x) { + assert(UseAESIntrinsics, "why are we here?"); + + // first argument is object itself + LIRItem obj(x->argument_at(0), this); + LIRItem from(x->argument_at(1), this); + LIRItem foff(x->argument_at(2), this); + LIRItem to(x->argument_at(3), this); + LIRItem toff(x->argument_at(4), this); + LIR_Opr addr = new_pointer_register(); + + BasicTypeList signature(3); + signature.append(T_ADDRESS); + signature.append(T_ADDRESS); + signature.append(T_ADDRESS); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + // From buffer + LIR_Address* a; + if (foff.result()->is_constant()) { + jint c = foff.result()->as_jint(); + a = new LIR_Address(from.result(), + c, + T_BYTE); + } else { + a = new LIR_Address(from.result(), + foff.result(), + LIR_Address::times_1, + 0, + T_BYTE); + } + __ leal(LIR_OprFact::address(a), addr); + __ add(addr, LIR_OprFact::intConst(arrayOopDesc::base_offset_in_bytes(T_BYTE)), cc->at(0)); + + // To buffer + if (toff.result()->is_constant()) { + jint c = toff.result()->as_jint(); + a = new LIR_Address(to.result(), + c, + T_BYTE); + } else { + a = new LIR_Address(to.result(), + toff.result(), + LIR_Address::times_1, + 0, + T_BYTE); + } + __ leal(LIR_OprFact::address(a), addr); + __ add(addr, LIR_OprFact::intConst(arrayOopDesc::base_offset_in_bytes(T_BYTE)), cc->at(1)); + + // Key + LIR_Address* k = new LIR_Address(obj.result(), + com_sun_crypto_provider_AESCrypt::K_offset(), + T_OBJECT); + + __ load(k, addr); + __ add(addr, LIR_OprFact::intConst(arrayOopDesc::base_offset_in_bytes(T_BYTE)), cc->at(2)); + + set_no_result(x); + + switch (x->id()) { + case vmIntrinsics::_aescrypt_encryptBlock: + { + __ call_runtime_leaf(StubRoutines::aescrypt_encryptBlock(), getThreadTemp(), LIR_OprFact::illegalOpr, cc->args()); + break; + } + case vmIntrinsics::_aescrypt_decryptBlock: + { + __ call_runtime_leaf(StubRoutines::aescrypt_decryptBlock(), getThreadTemp(), LIR_OprFact::illegalOpr, cc->args()); + break; + } + default: + { + ShouldNotReachHere(); + } + } +} + +// This method is called in the C1 Xcom mode +void LIRGenerator::do_aescrypt_cbc(Intrinsic* x) { + assert(UseAESIntrinsics && UseNeon, "why are we here?"); + + LIRItem obj(x->argument_at(0), this); + LIRItem from(x->argument_at(1), this); + LIRItem foff(x->argument_at(2), this); + LIRItem flen(x->argument_at(3), this); + LIRItem to(x->argument_at(4), this); + LIRItem toff(x->argument_at(5), this); + LIR_Opr addr = new_pointer_register(); + + // force to load len into r4 + flen.load_item_force (FrameMap::as_opr(r4)); + + BasicTypeList signature(5); + signature.append(T_ADDRESS); //from + signature.append(T_ADDRESS); //to + signature.append(T_ADDRESS); //key + signature.append(T_ADDRESS); //rvec + signature.append(T_INT); //len + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + // From buffer + LIR_Address* a; + a = new LIR_Address(from.result(),T_OBJECT); + __ leal(LIR_OprFact::address(a), addr); + __ add(addr, LIR_OprFact::intConst(arrayOopDesc::base_offset_in_bytes(T_BYTE)), addr); + __ add(addr, foff.result(), cc->at(0)); + + // To buffer + a = new LIR_Address(to.result(),T_OBJECT); + __ leal(LIR_OprFact::address(a), addr); + __ add(addr, LIR_OprFact::intConst(arrayOopDesc::base_offset_in_bytes(T_BYTE)), addr); + __ add(addr, toff.result(), cc->at(1)); + // key + a = new LIR_Address(obj.result(), + com_sun_crypto_provider_FeedbackCipher::embeddedCipher_offset(), + T_OBJECT); + __ load(a, addr); + __ add(addr, LIR_OprFact::intConst(com_sun_crypto_provider_AESCrypt::K_offset()), addr); + a = new LIR_Address(addr, T_OBJECT); + __ load(a, addr); + __ add(addr, LIR_OprFact::intConst(arrayOopDesc::base_offset_in_bytes(T_BYTE)), cc->at(2)); + + // rvec + a = new LIR_Address(obj.result(), + com_sun_crypto_provider_CipherBlockChaining::r_offset(), + T_OBJECT); + __ load(a, addr); + __ add(addr, LIR_OprFact::intConst(arrayOopDesc::base_offset_in_bytes(T_BYTE)), cc->at(3)); + + //input len + __ move(flen.result(), cc->at(4)); + + LIR_Opr result = rlock_result(x); + const LIR_Opr result_reg = result_register_for(x->type()); + + switch (x->id()) { + case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: + { + __ call_runtime_leaf(StubRoutines::cipherBlockChaining_encryptAESCrypt_special(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + break; + } + case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: + { + __ call_runtime_leaf(StubRoutines::cipherBlockChaining_decryptAESCrypt_special(), getThreadTemp(), result_reg, cc->args()); + __ move(result_reg, result); + break; + } + default: + { + ShouldNotReachHere(); + } + + } +} + +// This method is called in the C1 Xcom mode +void LIRGenerator::do_sha(Intrinsic* x) { + assert(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics, "why are we here?"); + + // first argument is object itself + LIRItem obj(x->argument_at(0), this); + LIRItem from(x->argument_at(1), this); + LIRItem foff(x->argument_at(2), this); + + BasicTypeList signature(2); + signature.append(T_ADDRESS); + signature.append(T_ADDRESS); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + // From buffer + LIR_Address* a; + if (foff.result()->is_constant()) { + jint c = foff.result()->as_jint(); + a = new LIR_Address(from.result(), + c, + T_BYTE); + } else { + a = new LIR_Address(from.result(), + foff.result(), + LIR_Address::times_1, + 0, + T_BYTE); + } + LIR_Opr addr_from = new_pointer_register(); + __ leal(LIR_OprFact::address(a), addr_from); + __ add(addr_from, LIR_OprFact::intConst(arrayOopDesc::base_offset_in_bytes(T_BYTE)), cc->at(0)); + + + // State + int state_offset; + int state_data_offset; + address stub_addr; + switch (x->id()) { + case vmIntrinsics::_sha_implCompress: + state_offset = sun_security_provider_SHA2::state_offset(); + state_data_offset = arrayOopDesc::base_offset_in_bytes(T_INT); + stub_addr = StubRoutines::sha1_implCompress(); + break; + case vmIntrinsics::_sha2_implCompress: + state_offset = sun_security_provider_SHA2::state_offset(); + state_data_offset = arrayOopDesc::base_offset_in_bytes(T_INT); + stub_addr = StubRoutines::sha256_implCompress(); + break; + case vmIntrinsics::_sha5_implCompress: + state_offset = sun_security_provider_SHA5::state_offset(); + state_data_offset = arrayOopDesc::base_offset_in_bytes(T_LONG); + stub_addr = StubRoutines::sha512_implCompress(); + break; + default: + ShouldNotReachHere(); + return; // unreachable + } + + LIR_Address* state = new LIR_Address(obj.result(), state_offset, T_OBJECT); + + LIR_Opr addr_state = new_pointer_register(); + __ load(state, addr_state); + __ add(addr_state, LIR_OprFact::intConst(state_data_offset), cc->at(1)); + + set_no_result(x); + + __ call_runtime_leaf(stub_addr, getThreadTemp(), LIR_OprFact::illegalOpr, cc->args()); + +} + +void LIRGenerator::do_montgomery_intrinsic(Intrinsic* x) { + bool squaring = x->id() == vmIntrinsics::_montgomerySquare; + int n_arg_idx = squaring ? 1 : 2; + assert(squaring ? UseMontgomerySquareIntrinsic : UseMontgomeryMultiplyIntrinsic, "why are we here?"); + + LIRItem a(x->argument_at(0), this); + LIRItem n(x->argument_at(n_arg_idx), this); + LIRItem len(x->argument_at(n_arg_idx+1), this); + LIRItem inv(x->argument_at(n_arg_idx+2), this); + LIRItem product(x->argument_at(n_arg_idx+3), this); + + BasicTypeList signature(squaring ? 5 : 6); + signature.append(T_ADDRESS); + if (!squaring) + signature.append(T_ADDRESS); + signature.append(T_ADDRESS); + signature.append(T_INT); + signature.append(T_LONG); + signature.append(T_ADDRESS); + CallingConvention* cc = frame_map()->c_calling_convention(&signature); + + // A array, c_rarg0 + __ leal(LIR_OprFact::address(emit_array_address(a.result(), LIR_OprFact::intConst(0), T_INT)), cc->at(0)); + if (!squaring) { + LIRItem b(x->argument_at(1), this); + // B array, c_rarg1 + __ leal(LIR_OprFact::address(emit_array_address(b.result(), LIR_OprFact::intConst(0), T_INT)), cc->at(1)); + } + // N array, c_rarg2(1) + __ leal(LIR_OprFact::address(emit_array_address(n.result(), LIR_OprFact::intConst(0), T_INT)), cc->at(n_arg_idx)); + // len, c_rarg3(2) + assert(cc->at(n_arg_idx+1)->is_cpu_register(), "assumed"); + __ move(len.result(), cc->at(n_arg_idx+1)); + // inv, stack slot + assert(cc->at(n_arg_idx+2)->is_address(), "assumed"); + __ move(inv.result(), cc->at(n_arg_idx+2)); + // M array, stack slot + LIR_Opr addr = new_pointer_register(); + __ leal(LIR_OprFact::address(emit_array_address(product.result(), LIR_OprFact::intConst(0), T_INT)), addr); + __ move(addr, cc->at(n_arg_idx+3)); + + set_result(x, product.result()); + + switch (x->id()) { + case vmIntrinsics::_montgomeryMultiply: + { + __ call_runtime_leaf(StubRoutines::montgomeryMultiply(), getThreadTemp(), LIR_OprFact::illegalOpr, cc->args()); + break; + } + case vmIntrinsics::_montgomerySquare: + { + __ call_runtime_leaf(StubRoutines::montgomerySquare(), getThreadTemp(), LIR_OprFact::illegalOpr, cc->args()); + break; + } + default: + { + ShouldNotReachHere(); + } + } +} + +void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) { + Unimplemented(); +} + +void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { + fatal("vectorizedMismatch intrinsic is not implemented on this platform"); +} + +// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f +// _i2b, _i2c, _i2s +void LIRGenerator::do_Convert(Convert* x) { + address entry = NULL; + switch (x->op()) { + case Bytecodes::_d2i: + case Bytecodes::_f2i: + case Bytecodes::_i2f: + case Bytecodes::_i2d: + case Bytecodes::_f2d: + case Bytecodes::_d2f: + if(hasFPU()) { + break; + }// fall through for FPU-less cores + case Bytecodes::_d2l: + case Bytecodes::_f2l: + case Bytecodes::_l2d: + case Bytecodes::_l2f: { + + switch (x->op()) { +#ifdef __SOFTFP__ + case Bytecodes::_i2f: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::i2f); + break; + case Bytecodes::_i2d: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::i2d); + break; + case Bytecodes::_f2d: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::f2d); + break; + case Bytecodes::_d2f: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::d2f); + break; + case Bytecodes::_d2i: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::d2i); + break; + case Bytecodes::_f2i: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::f2i); + break; +#endif + case Bytecodes::_d2l: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::d2l); + break; + case Bytecodes::_f2l: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::f2l); + break; + case Bytecodes::_l2d: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2d); + break; + case Bytecodes::_l2f: + entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f); + break; + default: + ShouldNotReachHere(); + } + LIR_Opr result = call_runtime(x->value(), entry, x->type(), NULL); + set_result(x, result); + } + break; + + default: + break; +} + if(NULL == entry) { + LIRItem value(x->value(), this); + value.load_item(); + + if (x->op() == Bytecodes::_f2i || x->op() == Bytecodes::_d2i) { + value.set_destroys_register(); + } + + LIR_Opr input = value.result(); + LIR_Opr result = rlock(x); + + __ convert(x->op(), input, result); + + assert(result->is_virtual(), "result must be virtual register"); + set_result(x, result); + } +} + +void LIRGenerator::do_NewInstance(NewInstance* x) { +#ifndef PRODUCT + if (PrintNotLoaded && !x->klass()->is_loaded()) { + tty->print_cr(" ###class not loaded at new bci %d", x->printable_bci()); + } +#endif + CodeEmitInfo* info = state_for(x, x->state()); + LIR_Opr reg = result_register_for(x->type()); + new_instance(reg, x->klass(), x->is_unresolved(), + FrameMap::r2_oop_opr, + FrameMap::r5_oop_opr, + FrameMap::r4_oop_opr, + LIR_OprFact::illegalOpr, + FrameMap::r3_metadata_opr, info); + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewTypeArray(NewTypeArray* x) { + CodeEmitInfo* info = state_for(x, x->state()); + + LIRItem length(x->length(), this); + length.load_item_force(FrameMap::r6_opr); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r2_oop_opr; + LIR_Opr tmp2 = FrameMap::r4_oop_opr; + LIR_Opr tmp3 = FrameMap::r5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r3_metadata_opr; + LIR_Opr len = length.result(); + BasicType elem_type = x->elt_type(); + + __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg); + + CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_NewObjectArray(NewObjectArray* x) { + LIRItem length(x->length(), this); + // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction + // and therefore provide the state before the parameters have been consumed + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + } + + CodeEmitInfo* info = state_for(x, x->state()); + + LIR_Opr reg = result_register_for(x->type()); + LIR_Opr tmp1 = FrameMap::r2_oop_opr; + LIR_Opr tmp2 = FrameMap::r4_oop_opr; + LIR_Opr tmp3 = FrameMap::r5_oop_opr; + LIR_Opr tmp4 = reg; + LIR_Opr klass_reg = FrameMap::r3_metadata_opr; + + length.load_item_force(FrameMap::r6_opr); + LIR_Opr len = length.result(); + + CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info); + ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass()); + if (obj == ciEnv::unloaded_ciobjarrayklass()) { + BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error"); + } + klass2reg_with_patching(klass_reg, obj, patching_info); + __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + + +void LIRGenerator::do_NewMultiArray(NewMultiArray* x) { + Values* dims = x->dims(); + int i = dims->length(); + LIRItemList* items = new LIRItemList(i, i, NULL); + while (i-- > 0) { + LIRItem* size = new LIRItem(dims->at(i), this); + items->at_put(i, size); + } + + // Evaluate state_for early since it may emit code. + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || PatchALot) { + patching_info = state_for(x, x->state_before()); + + // Cannot re-use same xhandlers for multiple CodeEmitInfos, so + // clone all handlers (NOTE: Usually this is handled transparently + // by the CodeEmitInfo cloning logic in CodeStub constructors but + // is done explicitly here because a stub isn't being used). + x->set_exception_handlers(new XHandlers(x->exception_handlers())); + } + CodeEmitInfo* info = state_for(x, x->state()); + + i = dims->length(); + while (i-- > 0) { + LIRItem* size = items->at(i); + size->load_item(); + + store_stack_parameter(size->result(), in_ByteSize(i*4)); + } + + LIR_Opr klass_reg = FrameMap::r1_metadata_opr; + klass2reg_with_patching(klass_reg, x->klass(), patching_info); + + LIR_Opr rank = FrameMap::r2_opr; + __ move(LIR_OprFact::intConst(x->rank()), rank); + LIR_Opr varargs = FrameMap::r3_opr; + __ move(FrameMap::sp_opr, varargs); + LIR_OprList* args = new LIR_OprList(3); + args->append(klass_reg); + args->append(rank); + args->append(varargs); + LIR_Opr reg = result_register_for(x->type()); + __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id), + LIR_OprFact::illegalOpr, + reg, args, info); + + LIR_Opr result = rlock_result(x); + __ move(reg, result); +} + +void LIRGenerator::do_BlockBegin(BlockBegin* x) { + // nothing to do for now +} + +void LIRGenerator::do_CheckCast(CheckCast* x) { + LIRItem obj(x->obj(), this); + + CodeEmitInfo* patching_info = NULL; + if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check() && !x->is_invokespecial_receiver_check())) { + // must do this before locking the destination register as an oop register, + // and before the obj is loaded (the latter is for deoptimization) + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + + // info for exceptions + CodeEmitInfo* info_for_exception = + (x->needs_exception_state() ? state_for(x) : + state_for(x, x->state_before(), true /*ignore_xhandler*/)); + + CodeStub* stub; + if (x->is_incompatible_class_change_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception); + } else if (x->is_invokespecial_receiver_check()) { + assert(patching_info == NULL, "can't patch this"); + stub = new DeoptimizeStub(info_for_exception, + Deoptimization::Reason_class_check, + Deoptimization::Action_none); + } else { + stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception); + } + LIR_Opr reg = rlock_result(x); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded()) { + tmp3 = new_register(objectType); + } + __ checkcast(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), info_for_exception, patching_info, stub, + x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_InstanceOf(InstanceOf* x) { + LIRItem obj(x->obj(), this); + + // result and test object may not be in same register + LIR_Opr reg = rlock_result(x); + CodeEmitInfo* patching_info = NULL; + if ((!x->klass()->is_loaded() || PatchALot)) { + // must do this before locking the destination register as an oop register + patching_info = state_for(x, x->state_before()); + } + obj.load_item(); + LIR_Opr tmp3 = LIR_OprFact::illegalOpr; + if (!x->klass()->is_loaded()) { + tmp3 = new_register(objectType); + } + __ instanceof(reg, obj.result(), x->klass(), + new_register(objectType), new_register(objectType), tmp3, + x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci()); +} + +void LIRGenerator::do_If(If* x) { + assert(x->number_of_sux() == 2, "inconsistency"); + ValueTag tag = x->x()->type()->tag(); + + If::Condition cond = x->cond(); + + LIRItem xitem(x->x(), this); + LIRItem yitem(x->y(), this); + LIRItem* xin = &xitem; + LIRItem* yin = &yitem; + + xin->load_item(); + + if (yin->is_constant()) { + if (tag == longTag + && Assembler::operand_valid_for_add_sub_immediate(yin->get_jlong_constant())) { + yin->dont_load_item(); + } else if (tag == intTag + && Assembler::operand_valid_for_add_sub_immediate(yin->get_jint_constant())) { + yin->dont_load_item(); + } else if (tag == addressTag + && Assembler::operand_valid_for_add_sub_immediate(yin->get_address_constant())) { + yin->dont_load_item(); + } else if (tag == objectTag && yin->get_jobject_constant()->is_null_object()) { + yin->dont_load_item(); + } else { + yin->load_item(); + } + } else { + yin->load_item(); + } + + set_no_result(x); + + LIR_Opr left = xin->result(); + LIR_Opr right = yin->result(); + LIR_Condition lir_c = lir_cond(cond); + + // add safepoint before generating condition code so it can be recomputed + if (x->is_safepoint()) { + // increment backedge counter if needed + increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()), + x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci()); + __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before())); + } + + +#ifdef __SOFTFP__ + if(x->x()->type()->is_float_kind() && !(hasFPU())) {// FPU-less cores + address entry; + bool unordered_flag = x->unordered_is_true() != (lir_c == lir_cond_greater || lir_c == lir_cond_lessEqual); + if (x->x()->type()->is_float()) { + entry = CAST_FROM_FN_PTR(address, unordered_flag ? SharedRuntime::fcmpg : SharedRuntime::fcmpl); + } else if (x->x()->type()->is_double()) { + entry = CAST_FROM_FN_PTR(address, unordered_flag ? SharedRuntime::dcmpg : SharedRuntime::dcmpl); + } else { + ShouldNotReachHere(); + } + + LIR_Opr fcmp_res = call_runtime(x->x(), x->y(), entry, intType, NULL); + LIR_Opr zero = LIR_OprFact::intConst(0); + __ cmp(lir_c, fcmp_res, zero); + } else +#endif + { + __ cmp(lir_c, left, right); + } + + // Generate branch profiling. Profiling code doesn't kill flags. + profile_branch(x, cond); + move_to_phi(x->state()); + + if (x->x()->type()->is_float_kind()) { + if(hasFPU()) { + __ branch(lir_c, right->type(), x->tsux(), x->usux()); + } else { + __ branch(lir_c, T_INT, x->tsux()); + } + } else + { + __ branch(lir_c, right->type(), x->tsux()); + } + assert(x->default_sux() == x->fsux(), "wrong destination above"); + __ jump(x->default_sux()); +} + +LIR_Opr LIRGenerator::getThreadPointer() { + return FrameMap::as_pointer_opr(rthread); +} + +void LIRGenerator::trace_block_entry(BlockBegin* block) { + __ move(LIR_OprFact::intConst(block->block_id()), FrameMap::r0_opr); + LIR_OprList* args = new LIR_OprList(1); + args->append(FrameMap::r0_opr); + address func = CAST_FROM_FN_PTR(address, Runtime1::trace_block_entry); + __ call_runtime_leaf(func, LIR_OprFact::illegalOpr, LIR_OprFact::illegalOpr, args); +} + +void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address, + CodeEmitInfo* info) { + if (value->is_double_cpu()) { + __ move(value, FrameMap::long0_opr); + __ volatile_store_mem_reg(FrameMap::long0_opr, address, info); + } else { + __ volatile_store_mem_reg(value, address, info); + } +} + +void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, + CodeEmitInfo* info) { + if (result->is_double_cpu()) { + __ volatile_load_mem_reg(address, FrameMap::long0_opr, info); + __ move(FrameMap::long0_opr, result); + } else { + __ volatile_load_mem_reg(address, result, info); + } +} --- /dev/null 2018-09-25 19:24:26.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_LIR_aarch32.cpp 2018-09-25 19:24:26.000000000 +0300 @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/register.hpp" +#include "c1/c1_LIR.hpp" + +FloatRegister LIR_OprDesc::as_float_reg() const { + return as_FloatRegister(fpu_regnr()); +} + +FloatRegister LIR_OprDesc::as_double_reg() const { + return as_FloatRegister(fpu_regnrLo()); +} + +// Reg2 unused. +LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { + assert(as_FloatRegister(reg2) != fnoreg, "aarch32 holds double in two regs."); + return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) | + (reg2 << LIR_OprDesc::reg2_shift) | + LIR_OprDesc::double_type | + LIR_OprDesc::fpu_register | + LIR_OprDesc::double_size); +} + +#ifndef PRODUCT +void LIR_Address::verify() const { + assert(base()->is_cpu_register(), "wrong base operand"); + assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand"); + assert(base()->type() == T_OBJECT || base()->type() == T_INT || base()->type() == T_METADATA, + "wrong type for addresses"); +} +#endif // PRODUCT + --- /dev/null 2018-09-25 19:24:27.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_LinearScan_aarch32.cpp 2018-09-25 19:24:27.000000000 +0300 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_LinearScan.hpp" + +void LinearScan::allocate_fpu_stack() { + // No FPU stack on AArch32 +} --- /dev/null 2018-09-25 19:24:29.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_LinearScan_aarch32.hpp 2018-09-25 19:24:28.000000000 +0300 @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_C1_LINEARSCAN_AARCH32_HPP +#define CPU_AARCH32_VM_C1_LINEARSCAN_AARCH32_HPP + +inline bool LinearScan::is_processed_reg_num(int reg_num) { + return reg_num <= pd_last_cpu_reg || reg_num >= pd_nof_cpu_regs_frame_map; +} + +inline int LinearScan::num_physical_regs(BasicType type) { + if (type == T_LONG || type == T_DOUBLE) { + return 2; + } + return 1; +} + +inline bool LinearScan::requires_adjacent_regs(BasicType type) { + return type == T_DOUBLE; +} + +inline bool LinearScan::is_caller_save(int assigned_reg) { + assert(assigned_reg >= 0 && assigned_reg < nof_regs, + "should call this only for registers"); + // TODO: Remove the following line when support for callee-saved registers + // is added + return true; + if (assigned_reg < pd_first_callee_saved_cpu_reg) { + return true; + } + if (assigned_reg > pd_last_callee_saved_cpu_reg && + assigned_reg < pd_first_callee_saved_fpu_reg) { + return true; + } + if (assigned_reg > pd_last_callee_saved_fpu_reg && + assigned_reg <= pd_last_fpu_reg) { + return true; + } + return false; +} + +// If there are special cases when some particular LIR operations kill some +// specific registers, this behavior should be described here. An example +// can be found in x86 port. +inline void LinearScan::pd_add_temps(LIR_Op* op) { + if (op->code() == lir_move) { + LIR_Op1* move_op = op->as_Op1(); + if (move_op->move_kind() == lir_move_volatile) { + bool is_long = move_op->type() == T_LONG; + bool is_double = move_op->type() == T_DOUBLE; + bool is_store = move_op->in_opr()->is_register(); + if (is_double) { + add_temp(reg_num(FrameMap::long0_opr), op->id(), noUse, T_ILLEGAL); + add_temp(reg_numHi(FrameMap::long0_opr), op->id(), noUse, T_ILLEGAL); + } + if (is_store && (is_long || is_double)) { + add_temp(reg_num(FrameMap::long1_opr), op->id(), noUse, T_ILLEGAL); + add_temp(reg_numHi(FrameMap::long1_opr), op->id(), noUse, T_ILLEGAL); + } + } + } +} + +inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { +#ifndef HARD_FLOAT_CC + BasicType type = cur->type(); + if(!hasFPU()) { + if (type == T_FLOAT || type == T_DOUBLE) { + _first_reg = pd_first_cpu_reg; + _last_reg = FrameMap::last_cpu_reg();; + return true; + } + } +#endif + return false; +} + +#endif // CPU_AARCH32_VM_C1_LINEARSCAN_AARCH32_HPP --- /dev/null 2018-09-25 19:24:30.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_MacroAssembler_aarch32.cpp 2018-09-25 19:24:29.000000000 +0300 @@ -0,0 +1,434 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "classfile/systemDictionary.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interpreter/interpreter.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" + +void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, + Register result) +{ + Label done; + if (is_float) { + vcmp_f32(f0, f1); + } else { + vcmp_f64(f0, f1); + } + + get_fpsr(); + + mov(result, 0); + if (unordered_result < 0) { + // we want -1 for unordered or less than, 0 for equal and 1 for + // greater than. + mov(result, 1, NE); // Not equal or unordered + neg(result, result, LT); // Less than or unordered + } else { + // we want -1 for less than, 0 for equal and 1 for unordered or + // greater than. + mov(result, 1, NE); // Not equal or unordered + neg(result, result, LO); // Less than + } +} + +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done, fail; + int null_check_offset = -1; + + verify_oop(obj); + + // save object being locked into the BasicObjectLock + str(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + assert(scratch != noreg, "should have scratch register at this point"); + null_check_offset = biased_locking_enter(obj, hdr, scratch, rscratch1, false, done, &slow_case); + } else { + null_check_offset = offset(); + } + + // Load object header + ldr(hdr, Address(obj, hdr_offset)); + // and mark it as unlocked + orr(hdr, hdr, markOopDesc::unlocked_value); + // save unlocked object header into the displaced header location on the stack + str(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + lea(rscratch2, Address(obj, hdr_offset)); + cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr - sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + mov(rscratch1, sp); + sub(hdr, hdr, rscratch1); + mov(rscratch2, aligned_mask - os::vm_page_size()); + ands(hdr, hdr, rscratch2); + // for recursive locking, the result is zero => save it in the displaced header + // location (NULL in the displaced hdr location indicates recursive locking) + str(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + cbnz(hdr, slow_case); + // done + bind(done); + if (PrintBiasedLockingStatistics) { + lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr())); + addmw(Address(rscratch2, 0), 1, rscratch1); + } + return null_check_offset; +} + + +void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { + const int aligned_mask = BytesPerWord -1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); + Label done; + + if (UseBiasedLocking) { + // load object + ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + biased_locking_exit(obj, hdr, done); + } + + // load displaced header + ldr(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is NULL we had recursive locking + // if we had recursive locking, we are done + cbz(hdr, done); + if (!UseBiasedLocking) { + // load object + ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); + } + verify_oop(obj); + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + lea(rscratch1, Address(obj, hdr_offset)); + cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case); + } else { + cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case); + } + // done + bind(done); +} + + +// Defines obj, preserves var_size_in_bytes +void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, Label& slow_case) { + if (UseTLAB) { + tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); + } else { + eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); + } +} + +void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { + assert_different_registers(obj, klass, len); + if (UseBiasedLocking && !len->is_valid()) { + assert_different_registers(obj, klass, len, t1, t2); + ldr(t1, Address(klass, Klass::prototype_header_offset())); + } else { + // This assumes that all prototype bits fit in an int32_t + mov(t1, (int32_t)(intptr_t)markOopDesc::prototype()); + } + str(t1, Address(obj, oopDesc::mark_offset_in_bytes())); + str(klass, Address(obj, oopDesc::klass_offset_in_bytes())); + + if (len->is_valid()) { + str(len, Address(obj, arrayOopDesc::length_offset_in_bytes())); + } +} + +// preserves obj, destroys len_in_bytes +void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); + Label done; + + // len_in_bytes is positive and ptr sized + subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes); + b(done, Assembler::EQ); + + // Preserve obj + if (hdr_size_in_bytes) + add(obj, obj, hdr_size_in_bytes); + zero_memory(obj, len_in_bytes, t1); + if (hdr_size_in_bytes) + sub(obj, obj, hdr_size_in_bytes); + + bind(done); +} + + +void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) { + assert_different_registers(obj, t1, t2); // XXX really? + assert(header_size >= 0 && object_size >= header_size, "illegal sizes"); + + try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); + + initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); +} + +// This method clobbers t1, t2, and rscratch1 registers. +void C1_MacroAssembler::initialize_object(Register obj, Register klass, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, Register t2, + bool is_tlab_allocated) { + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, + "con_size_in_bytes is not multiple of alignment"); + + const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; + + initialize_header(obj, klass, noreg, t1, t2); + + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { + // Null out rest of allocated space + const Register index = t2; + const int threshold = 8 * BytesPerWord; + if (var_size_in_bytes != noreg) { + mov(index, var_size_in_bytes); + initialize_body(obj, index, hdr_size_in_bytes, t1); + } else if (con_size_in_bytes <= threshold) { + // Emit required number of str instructions (unroll loop completely) + mov(t1, 0); + for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord) { + str(t1, Address(obj, i)); + } + } else if (con_size_in_bytes > hdr_size_in_bytes) { + block_comment("zero memory"); + // Use loop to null out fields + int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord; + mov(t1, 0); + + const int unroll = 4; // Number of str instructions we'll unroll + mov(index, words / unroll); + int remainder = words % unroll; + lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord)); + + Label entry_point, loop; + b(entry_point); + bind(loop); + sub(index, index, 1); + for (int i = -unroll; i < 0; i++) { + if (-i == remainder) { + bind(entry_point); + } + str(t1, Address(rscratch1, i * BytesPerWord)); + } + if (remainder == 0) { + bind(entry_point); + } + add(rscratch1, rscratch1, unroll * BytesPerWord); + cbnz(index, loop); + } + } + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == r0, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for( + Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} + +void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) { + assert_different_registers(obj, len, t1, t2, klass); + + // determine alignment mask + assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work"); + + // check for negative or excessive length + mov(rscratch1, (int32_t)max_array_allocation_length); + cmp(len, rscratch1); + b(slow_case, Assembler::HS); + + const Register arr_size = t2; // okay to be the same + // align object end + mov(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask); + add(arr_size, arr_size, len, Assembler::lsl(f)); + mov(t1, ~MinObjAlignmentInBytesMask); + andr(arr_size, arr_size, t1); + + try_allocate(obj, arr_size, 0, t1, t2, slow_case); + + initialize_header(obj, klass, len, t1, t2); + + // clear rest of allocated space + const Register len_zero = len; + initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero); + + membar(StoreStore); + + if (CURRENT_ENV->dtrace_alloc_probes()) { + assert(obj == r0, "must be"); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + } + + verify_oop(obj); +} + + +void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) { + verify_oop(receiver); + // explicit NULL check not needed since load from [klass_offset] causes a trap + // check against inline cache + assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check"); + + cmp_klass(receiver, iCache, rscratch1); +} + +void C1_MacroAssembler::build_frame(int frame_size_in_bytes, + int bang_size_in_bytes) { + assert(bang_size_in_bytes >= frame_size_in_bytes, + "stack bang size incorrect"); + + // If we have to make this method not-entrant, we'll overwrite its first + // instruction with a jump. For this action to be legal we must ensure that + // this first instruction is a B, BL, NOP, BKPT, or SVC. Make it a NOP + nop(); + + // Make sure there is enough stack space for this method's activation + generate_stack_overflow_check(bang_size_in_bytes); + + // Push lr, rfp, and optionally update rfp. rfp points to the first stack + // word used by the new frame. + + if (FrameAPCS) { + mov(rscratch2, sp); + stmdb(sp, RegSet::of(rfp, rscratch2, lr, r15_pc).bits()); + add(rfp, sp, 3 * wordSize); + } else { + stmdb(sp, RegSet::of(rfp, lr).bits()); + if (PreserveFramePointer) { + add(rfp, sp, BytesPerWord); + } + } + + // Create frame. frame_size_in_bytes always comes from + // LIR_Assembler::initial_frame_size_in_bytes() method, and it already + // takes into account two stack words spent on saving lr and rfp. + decrement(sp, frame_size_in_bytes); +} + +void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { + if (FrameAPCS) { + ldmea(rfp, RegSet::of(rfp, sp, lr).bits(), false/*wb*/); + } else { + // Remove frame. frame_size_in_bytes always comes from + // LIR_Assembler::initial_frame_size_in_bytes() method, and it already + // takes into account two stack words spent on saving lr and rfp. + increment(sp, frame_size_in_bytes); + // Pop rfp and lr + ldmia(sp, RegSet::of(rfp, lr).bits()); + } +} + +void C1_MacroAssembler::verified_entry() { +} + +void C1_MacroAssembler::patchable_load(Register reg, address addr) { + nop(); + membar(Assembler::LoadLoad); + far_load(reg, addr); +} + +void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { + //// Not APCS + // - 1: link + // fp 0: return address + // + 1: argument with offset 0 + // + 2: argument with offset 1 + // + 3: ... + //// APCS + // - 3: link + // - 2: sp + // - 1: return address + // fp 0: pc + // + 1: argument with offset 0 + // + 2: argument with offset 1 + // + 3: ... + + ldr(reg, Address(rfp, (offset_in_words + 1) * BytesPerWord)); +} + +#ifndef PRODUCT + +void C1_MacroAssembler::verify_stack_oop(int stack_offset) { + if (!VerifyOops) return; + verify_oop_addr(Address(sp, stack_offset), "oop"); +} + +void C1_MacroAssembler::verify_not_null_oop(Register r) { + if (!VerifyOops) return; + Label not_null; + cbnz(r, not_null); + stop("non-null oop required"); + bind(not_null); + verify_oop(r); +} + +void C1_MacroAssembler::invalidate_registers(bool inv_r0, bool inv_r2, bool inv_r3) { +#ifdef ASSERT + static int nn; + if (inv_r0) mov(r0, 0xDEAD); + if (inv_r2) mov(r2, nn++); + if (inv_r3) mov(r3, 0xDEAD); +#endif +} +#endif // ifndef PRODUCT --- /dev/null 2018-09-25 19:24:31.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_MacroAssembler_aarch32.hpp 2018-09-25 19:24:30.000000000 +0300 @@ -0,0 +1,116 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_C1_MACROASSEMBLER_AARCH32_HPP +#define CPU_AARCH32_VM_C1_MACROASSEMBLER_AARCH32_HPP + +//TODO: XXX: merge +//using MacroAssembler::build_frame; +using MacroAssembler::null_check; + +// C1_MacroAssembler contains high-level macros for C1 + + private: + int _rsp_offset; // track rsp changes + // initialization + void pd_init() { _rsp_offset = 0; } + + public: + void try_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2); + void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1); + + void float_cmp(bool is_float, int unordered_result, + FloatRegister f0, FloatRegister f1, + Register result); + + // locking + // hdr : must be r0, contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must point to the displaced header location, contents preserved + // scratch : scratch register, contents destroyed + // returns code offset at which to add null check debug information + int lock_object (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case); + + // unlocking + // hdr : contents destroyed + // obj : must point to the object to lock, contents preserved + // disp_hdr: must be r0 & must point to the displaced header location, contents destroyed + void unlock_object(Register swap, Register obj, Register lock, Label& slow_case); + + void initialize_object( + Register obj, // result: pointer to object after successful allocation + Register klass, // object klass + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB + ); + + // allocation of fixed-size objects + // (can also be used to allocate fixed-size arrays, by setting + // hdr_size correctly and storing the array length afterwards) + // obj : will contain pointer to allocated object + // t1, t2 : scratch registers - contents destroyed + // header_size: size of object header in words + // object_size: total size of object in words + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case); + + enum { + max_array_allocation_length = 0x00FFFFFF + }; + + // allocation of arrays + // obj : will contain pointer to allocated object + // len : array length in number of elements + // t : scratch register - contents destroyed + // header_size: size of object header in words + // f : element scale factor + // slow_case : exit to slow case implementation if fast allocation fails + void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, int f, Register klass, Label& slow_case); + + int rsp_offset() const { return _rsp_offset; } + void set_rsp_offset(int n) { _rsp_offset = n; } + + void invalidate_registers(bool inv_r0, bool inv_r2, bool inv_r3) PRODUCT_RETURN; + + void patchable_load(Register reg, address addr); + // This platform only uses signal-based null checks. The Label is not needed. + void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); } + + void load_parameter(int offset_in_words, Register reg); + +#endif // CPU_AARCH32_VM_C1_MACROASSEMBLER_AARCH32_HPP --- /dev/null 2018-09-25 19:24:32.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_Runtime1_aarch32.cpp 2018-09-25 19:24:31.000000000 +0300 @@ -0,0 +1,1154 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" +#include "c1/c1_Defs.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "c1/c1_Runtime1.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_aarch32.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "register_aarch32.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/signature.hpp" +#include "runtime/vframe.hpp" +#include "runtime/vframeArray.hpp" +#include "vmreg_aarch32.inline.hpp" + +// Implementation of StubAssembler + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { + // setup registers + assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different"); + assert(oop_result1 != rthread && metadata_result != rthread, "registers must be different"); + assert(args_size >= 0, "illegal args_size"); + + mov(c_rarg0, rthread); + set_num_rt_args(0); // Nothing on stack + + Label retaddr; + set_last_Java_frame(sp, rfp, retaddr, rscratch1); + + // do the call + lea(rscratch1, RuntimeAddress(entry)); + bl(rscratch1); + bind(retaddr); + int call_offset = offset(); + // verify callee-saved register +#ifdef ASSERT + push(r0, sp); + { Label L; + get_thread(r0); + cmp(rthread, r0); + b(L, Assembler::EQ); + stop("StubAssembler::call_RT: rthread not callee saved?"); + bind(L); + } + pop(r0, sp); +#endif + reset_last_Java_frame(true); + maybe_isb(); + + // check for pending exceptions + { Label L; + // check for pending exceptions (java_thread is set upon return) + ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + cbz(rscratch1, L); + mov(rscratch1, 0); + // exception pending => remove activation and forward to exception handler + // make sure that the vm_results are cleared + if (oop_result1->is_valid()) { + str(rscratch1, Address(rthread, JavaThread::vm_result_offset())); + } + if (metadata_result->is_valid()) { + str(rscratch1, Address(rthread, JavaThread::vm_result_2_offset())); + } + if (frame_size() == no_frame_size) { + leave(); + far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + } else if (_stub_id == Runtime1::forward_exception_id) { + should_not_reach_here(); + } else { + far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + } + bind(L); + } + // get oop results if there are any and reset the values in the thread + if (oop_result1->is_valid()) { + get_vm_result(oop_result1, rthread); + } + if (metadata_result->is_valid()) { + get_vm_result_2(metadata_result, rthread); + } + return call_offset; +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { + mov(c_rarg1, arg1); + return call_RT(oop_result1, metadata_result, entry, 1); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { + if (c_rarg1 == arg2) { + if (c_rarg2 == arg1) { + mov(rscratch1, arg1); + mov(arg1, arg2); + mov(arg2, rscratch1); + } else { + mov(c_rarg2, arg2); + mov(c_rarg1, arg1); + } + } else { + mov(c_rarg1, arg1); + mov(c_rarg2, arg2); + } + return call_RT(oop_result1, metadata_result, entry, 2); +} + + +int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { + // if there is any conflict use the stack + if (arg1 == c_rarg2 || arg1 == c_rarg3 || + arg2 == c_rarg1 || arg2 == c_rarg3 || + arg3 == c_rarg1 || arg3 == c_rarg2) { + push(arg2); + push(arg3); + push(arg1); + pop(c_rarg1); + pop(c_rarg3); + pop(c_rarg2); + } else { + mov(c_rarg1, arg1); + mov(c_rarg2, arg2); + mov(c_rarg3, arg3); + } + return call_RT(oop_result1, metadata_result, entry, 3); +} + +// Implementation of StubFrame + +class StubFrame: public StackObj { + private: + StubAssembler* _sasm; + + public: + StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments); + void load_argument(int offset_in_words, Register reg); + + ~StubFrame(); +};; + +void StubAssembler::prologue(const char* name, bool must_gc_arguments) { + set_info(name, must_gc_arguments); + enter(); +} + +void StubAssembler::epilogue() { + leave(); + ret(lr); +} + +#define __ _sasm-> + +StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) { + _sasm = sasm; + __ prologue(name, must_gc_arguments); +} + +// load parameters that were stored with LIR_Assembler::store_parameter +// Note: offsets for store_parameter and load_argument must match +void StubFrame::load_argument(int offset_in_words, Register reg) { + __ load_parameter(offset_in_words, reg); +} + + +StubFrame::~StubFrame() { + __ epilogue(); +} + +#undef __ + + +// Implementation of Runtime1 + +#define __ sasm-> + + +// Stack layout for saving/restoring all the registers needed during a runtime +// call (this includes deoptimization) +// Note: note that users of this frame may well have arguments to some runtime +// while these values are on the stack. These positions neglect those arguments +// but the code in save_live_registers will take the argument count into +// account. +// + +enum reg_save_layout { + reg_save_s0, + reg_save_s31 = reg_save_s0 + FrameMap::nof_fpu_regs - 1, + reg_save_pad, // to align to doubleword to simplify conformance to APCS + reg_save_r0, + reg_save_r1, + reg_save_r2, + reg_save_r3, + reg_save_r4, + reg_save_r5, + reg_save_r6, + reg_save_r7, + reg_save_r8, + reg_save_r9, + reg_save_r10, + reg_save_r11, + reg_save_r12, + reg_save_frame_size + // remaining words pushed by enter +}; + +// Save off registers which might be killed by calls into the runtime. +// Tries to smart of about FP registers. In particular we separate +// saving and describing the FPU registers for deoptimization since we +// have to save the FPU registers twice if we describe them. The +// deopt blob is the only thing which needs to describe FPU registers. +// In all other cases it should be sufficient to simply save their +// current value. + +static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs]; +static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs]; +static int reg_save_size_in_words; +static int frame_size_in_bytes = -1; + +static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) { + int frame_size_in_bytes = (reg_save_frame_size + frame::get_frame_size()) * BytesPerWord; + sasm->set_frame_size(frame_size_in_bytes / BytesPerWord); + int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r0), r0->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r1), r1->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r2), r2->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r3), r3->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r4), r4->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r5), r5->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r6), r6->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r7), r7->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r8), r8->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r9), r9->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r10), r10->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r11), r11->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_r12), r12->as_VMReg()); + if (hasFPU()) { + for (int i = 0; i < FrameMap::nof_fpu_regs; ++i) { + oop_map->set_callee_saved(VMRegImpl::stack2reg(reg_save_s0 + i), as_FloatRegister(i)->as_VMReg()); + } + } + + return oop_map; +} + +static OopMap* save_live_registers(StubAssembler* sasm, + bool save_fpu_registers = true) { + __ block_comment("save_live_registers"); + + __ push(RegSet::range(r0, r12), sp); // integer registers except lr & sp + __ sub(sp, sp, 4); // align to 8 bytes + + if (save_fpu_registers && hasFPU()) { + __ vstmdb_f64(sp, (1 << FrameMap::nof_fpu_regs / 2) - 1); + } else { + __ sub(sp, sp, FrameMap::nof_fpu_regs * 4); + } + + return generate_oop_map(sasm, save_fpu_registers); +} + +static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) { + + if (restore_fpu_registers && hasFPU()) { + __ vldmia_f64(sp, (1 << FrameMap::nof_fpu_regs / 2) - 1); + } else { + __ add(sp, sp, FrameMap::nof_fpu_regs * 4); + } + + __ add(sp, sp, 4); + __ pop(RegSet::range(r0, r12), sp); +} + +static void restore_live_registers_except_r0(StubAssembler* sasm, bool restore_fpu_registers = true) { + + if (restore_fpu_registers && hasFPU()) { + __ vldmia_f64(sp, (1 << FrameMap::nof_fpu_regs / 2) - 1); + } else { + __ add(sp, sp, FrameMap::nof_fpu_regs * 4); + } + + __ add(sp, sp, 8); + __ pop(RegSet::range(r1, r12), sp); +} + +void Runtime1::initialize_pd() { +} + +// target: the entry point of the method that creates and posts the exception oop +// has_argument: true if the exception needs arguments (passed in rscratch1 and rscratch2) + +OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) { + // make a frame and preserve the caller's caller-save registers + OopMap* oop_map = save_live_registers(sasm); + int call_offset; + if (!has_argument) { + call_offset = __ call_RT(noreg, noreg, target); + } else { + call_offset = __ call_RT(noreg, noreg, target, rscratch1, rscratch2); + } + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + __ should_not_reach_here(); + return oop_maps; +} + + +OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { + __ block_comment("generate_handle_exception"); + + // incoming parameters + const Register exception_oop = r0; + const Register exception_pc = r3; + // other registers used in this stub + + // Save registers, if required. + OopMapSet* oop_maps = new OopMapSet(); + OopMap* oop_map = NULL; + switch (id) { + case forward_exception_id: + // We're handling an exception in the context of a compiled frame. + // The registers have been saved in the standard places. Perform + // an exception lookup in the caller and dispatch to the handler + // if found. Otherwise unwind and dispatch to the callers + // exception handler. + oop_map = generate_oop_map(sasm, 1 /*thread*/); + __ mov(rscratch1, 0); + + // load and clear pending exception oop into r0 + __ ldr(exception_oop, Address(rthread, Thread::pending_exception_offset())); + __ str(rscratch1, Address(rthread, Thread::pending_exception_offset())); + + // load issuing PC (the return address for this stub) into r3 + __ ldr(exception_pc, Address(rfp, wordSize * frame::get_return_addr_offset())); + + // make sure that the vm_results are cleared (may be unnecessary) + __ str(rscratch1, Address(rthread, JavaThread::vm_result_offset())); + __ str(rscratch1, Address(rthread, JavaThread::vm_result_2_offset())); + break; + case handle_exception_nofpu_id: + case handle_exception_id: + // At this point all registers MAY be live. + oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: { + // At this point all registers except exception oop (r0) and + // exception pc (lr) are dead. + const int frame_size = frame::get_frame_size() /*fp, return address, ...*/; + assert(frame_size*wordSize % StackAlignmentInBytes == 0, "must be"); + oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); + sasm->set_frame_size(frame_size); + break; + } + default: + __ should_not_reach_here(); + break; + } + + // verify that only r0 and r3 are valid at this time + __ invalidate_registers(false, true, false); + // verify that r0 contains a valid exception + __ verify_not_null_oop(exception_oop); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are + // empty before writing to them + Label oop_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ cbz(rscratch1, oop_empty); + __ stop("exception oop already set"); + __ bind(oop_empty); + + Label pc_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + __ cbz(rscratch1, pc_empty); + __ stop("exception pc already set"); + __ bind(pc_empty); +#endif + + // save exception oop and issuing pc into JavaThread + // (exception handler will load it from here) + __ str(exception_oop, Address(rthread, JavaThread::exception_oop_offset())); + __ str(exception_pc, Address(rthread, JavaThread::exception_pc_offset())); + + // patch throwing pc into return address (has bci & oop map) + __ str(exception_pc, Address(rfp, wordSize * frame::get_return_addr_offset())); + + // compute the exception handler. + // the exception oop and the throwing pc are read from the fields in JavaThread + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc)); + oop_maps->add_gc_map(call_offset, oop_map); + + // r0: handler address + // will be the deopt blob if nmethod was deoptimized while we looked up + // handler regardless of whether handler existed in the nmethod. + + // only r0 is valid at this time, all other registers have been destroyed by the runtime call + __ invalidate_registers(false, true, true); + + // patch the return address, this stub will directly return to the exception handler + __ str(r0, Address(rfp, wordSize * frame::get_return_addr_offset())); + + switch (id) { + case forward_exception_id: + case handle_exception_nofpu_id: + case handle_exception_id: + // Restore the registers that were saved at the beginning. + restore_live_registers(sasm, id != handle_exception_nofpu_id); + break; + case handle_exception_from_callee_id: + // Pop the return address. + __ leave(); + __ ret(lr); // jump to exception handler + break; + default: ShouldNotReachHere(); + } + + return oop_maps; +} + + +void Runtime1::generate_unwind_exception(StubAssembler *sasm) { + // incoming parameters + const Register exception_oop = r0; + // other registers used in this stub + const Register exception_pc = r3; + const Register handler_addr = r1; + + // verify that only r0, is valid at this time + __ invalidate_registers(false, true, true); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ cbz(rscratch1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + __ cbz(rscratch1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // Save our return address because + // exception_handler_for_return_address will destroy it. We also + // save exception_oop + __ push(exception_oop); + __ push(lr); + + // search the exception handler address of the caller (using the return address) + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, lr); + // r0: exception handler address of the caller + + // Only R0 is valid at this time; all other registers have been + // destroyed by the call. + __ invalidate_registers(false, true, true); + + // move result of call into correct register + __ mov(handler_addr, r0); + + // get throwing pc (= return address). + // lr has been destroyed by the call + __ pop(lr); + __ pop(exception_oop); + __ mov(r3, lr); + + __ verify_not_null_oop(exception_oop); + + // continue at exception handler (return address removed) + // note: do *not* remove arguments when unwinding the + // activation since the caller assumes having + // all arguments on the stack when entering the + // runtime to determine the exception handler + // (GC happens at call site with arguments!) + // r0: exception oop + // r3: throwing pc + // r1: exception handler + __ b(handler_addr); +} + + + +OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { + // use the maximum number of runtime-arguments here because it is difficult to + // distinguish each RT-Call. + // Note: This number affects also the RT-Call in generate_handle_exception because + // the oop-map is shared for all calls. + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + OopMap* oop_map = save_live_registers(sasm); + + __ mov(c_rarg0, rthread); + Label retaddr; + __ set_last_Java_frame(sp, rfp, retaddr, rscratch1); + // do the call + __ lea(rscratch1, RuntimeAddress(target)); + __ bl(rscratch1); + __ bind(retaddr); + OopMapSet* oop_maps = new OopMapSet(); + oop_maps->add_gc_map(__ offset(), oop_map); + // verify callee-saved register +#ifdef ASSERT + { Label L; + __ get_thread(rscratch1); + __ cmp(rthread, rscratch1); + __ b(L, Assembler::EQ); + __ stop("StubAssembler::call_RT: rthread not callee saved?"); + __ bind(L); + } +#endif + __ reset_last_Java_frame(true); + __ maybe_isb(); + + // check for pending exceptions + { Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, L); + // exception pending => remove activation and forward to exception handler + + { Label L1; + __ cbnz(r0, L1); // have we deoptimized? + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + __ bind(L1); + } + + // the deopt blob expects exceptions in the special fields of + // JavaThread, so copy and clear pending exception. + + // load and clear pending exception + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rthread, Thread::pending_exception_offset())); + + // check that there is really a valid exception + __ verify_not_null_oop(r0); + + // load throwing pc: this is the return address of the stub + __ ldr(r3, Address(rfp, wordSize * frame::get_return_addr_offset())); + +#ifdef ASSERT + // check that fields in JavaThread for exception oop and issuing pc are empty + Label oop_empty; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, oop_empty); + __ stop("exception oop must be empty"); + __ bind(oop_empty); + + Label pc_empty; + __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + __ cbz(rscratch1, pc_empty); + __ stop("exception pc must be empty"); + __ bind(pc_empty); +#endif + + // store exception oop and throwing pc to JavaThread + __ str(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ str(r3, Address(rthread, JavaThread::exception_pc_offset())); + + restore_live_registers(sasm); + + __ leave(); + + // Forward the exception directly to deopt blob. We can blow no + // registers and must leave throwing pc on the stack. A patch may + // have values live in registers so the entry point with the + // exception in tls. + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); + + __ bind(L); + } + + + // Runtime will return true if the nmethod has been deoptimized during + // the patching process. In that case we must do a deopt reexecute instead. + + Label reexecuteEntry, cont; + + __ cbz(r0, cont); // have we deoptimized? + + // Will reexecute. Proper return address is already on the stack we just restore + // registers, pop all of our frame but the return address and jump to the deopt blob + restore_live_registers(sasm); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + + __ bind(cont); + restore_live_registers(sasm); + __ leave(); + __ ret(lr); + + return oop_maps; +} + + +OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { + + const Register exception_oop = r0; + const Register exception_pc = r3; + + // for better readability + const bool must_gc_arguments = true; + const bool dont_gc_arguments = false; + + // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu + bool save_fpu_registers = true; + + // stub code & info for the different stubs + OopMapSet* oop_maps = NULL; + OopMap* oop_map = NULL; + switch (id) { + { + case forward_exception_id: + { + oop_maps = generate_handle_exception(id, sasm); + __ leave(); + __ ret(lr); + } + break; + + case throw_div0_exception_id: + { StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false); + } + break; + + case throw_null_pointer_exception_id: + { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false); + } + break; + + case new_instance_id: + case fast_new_instance_id: + case fast_new_instance_init_check_id: + { + Register klass = r3; // Incoming + Register obj = r0; // Result + + if (id == new_instance_id) { + __ set_info("new_instance", dont_gc_arguments); + } else if (id == fast_new_instance_id) { + __ set_info("fast new_instance", dont_gc_arguments); + } else { + assert(id == fast_new_instance_init_check_id, "bad StubID"); + __ set_info("fast new_instance init check", dont_gc_arguments); + } + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) && + !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Label slow_path; + Register obj_size = r2; + Register t1 = r5; + Register t2 = r4; + assert_different_registers(klass, obj, obj_size, t1, t2); + + __ push(t1); + __ push(t2); + if (id == fast_new_instance_init_check_id) { + // make sure the klass is initialized + __ ldrb(rscratch1, Address(klass, InstanceKlass::init_state_offset())); + __ cmp(rscratch1, InstanceKlass::fully_initialized); + __ b(slow_path, Assembler::NE); + } + +#ifdef ASSERT + // assert object can be fast path allocated + { + Label ok, not_ok; + __ ldr(obj_size, Address(klass, Klass::layout_helper_offset())); + __ cmp(obj_size, 0u); + __ b(not_ok, Assembler::LE); // Make sure it's an instance (layout helper is positive) + __ tst(obj_size, Klass::_lh_instance_slow_path_bit); + __ b(ok, Assembler::EQ); + __ bind(not_ok); + __ stop("assert(can be fast path allocated)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // get the instance size + __ ldr(obj_size, Address(klass, Klass::layout_helper_offset())); + + __ eden_allocate(obj, obj_size, 0, t1, slow_path); + + __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false); + __ verify_oop(obj); + __ pop(t2); + __ pop(t1); + __ ret(lr); + + __ bind(slow_path); + __ pop(t2); + __ pop(t1); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + __ verify_oop(obj); + __ leave(); + __ ret(lr); + + // r0,: new instance + } + + break; + + case counter_overflow_id: + { + Register bci = r0, method = r1; + __ enter(); + OopMap* map = save_live_registers(sasm); + // Retrieve bci + __ ldr(bci, Address(rfp, 1*BytesPerWord)); + // And a pointer to the Method* + __ ldr(method, Address(rfp, 2*BytesPerWord)); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + __ ret(lr); + } + break; + + case new_type_array_id: + case new_object_array_id: + { + Register length = r6; // Incoming + Register klass = r3; // Incoming + Register obj = r0; // Result + + if (id == new_type_array_id) { + __ set_info("new_type_array", dont_gc_arguments); + } else { + __ set_info("new_object_array", dont_gc_arguments); + } + +#ifdef ASSERT + // assert object type is really an array of the proper kind + { + Label ok; + Register t0 = obj; + __ ldr(t0, Address(klass, Klass::layout_helper_offset())); + __ asr(t0, t0, Klass::_lh_array_tag_shift); + int tag = ((id == new_type_array_id) + ? Klass::_lh_array_tag_type_value + : Klass::_lh_array_tag_obj_value); + __ mov(rscratch1, tag); + __ cmp(t0, rscratch1); + __ b(ok, Assembler::EQ); + __ stop("assert(is an array klass)"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif // ASSERT + + // If TLAB is disabled, see if there is support for inlining contiguous + // allocations. + // Otherwise, just go to the slow path. + if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) { + Register arr_size = r4; + Register t1 = r2; + Register t2 = r5; + Label slow_path; + assert_different_registers(length, klass, obj, arr_size, t1, t2); + + // check that array length is small enough for fast path. + __ mov(rscratch1, C1_MacroAssembler::max_array_allocation_length); + __ cmp(length, rscratch1); + __ b(slow_path, Assembler::HI); + + // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F)) + __ ldr(t1, Address(klass, Klass::layout_helper_offset())); + __ andr(rscratch1, t1, 0x1f); + __ lsl(arr_size, length, rscratch1); + __ extract_bits(t1, t1, Klass::_lh_header_size_shift, + exact_log2(Klass::_lh_header_size_mask + 1)); + __ add(arr_size, arr_size, t1); + __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up + __ mov(rscratch1, ~MinObjAlignmentInBytesMask); + __ andr(arr_size, arr_size, rscratch1); + + __ eden_allocate(obj, arr_size, 0, t1, slow_path); // preserves arr_size + + __ initialize_header(obj, klass, length, t1, t2); + // Assume Little-Endian + __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte))); + assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise"); + assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise"); + __ andr(t1, t1, Klass::_lh_header_size_mask); + __ sub(arr_size, arr_size, t1); // body length + __ add(t1, t1, obj); // body start + __ initialize_body(t1, arr_size, 0, t2); + __ verify_oop(obj); + + __ ret(lr); + + __ bind(slow_path); + } + + __ enter(); + OopMap* map = save_live_registers(sasm); + int call_offset; + if (id == new_type_array_id) { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length); + } else { + call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length); + } + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + __ verify_oop(obj); + __ leave(); + __ ret(lr); + + // r0: new array + } + break; + + case new_multi_array_id: + { StubFrame f(sasm, "new_multi_array", dont_gc_arguments); + // r1: klass + // r2: rank + // r3: address of 1st dimension + OopMap* map = save_live_registers(sasm); + int call_offset = __ call_RT(r0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), r1, r2, r3); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers_except_r0(sasm); + + // r0,: new multi array + __ verify_oop(r0); + } + break; + + case register_finalizer_id: + { + __ set_info("register_finalizer", dont_gc_arguments); + + // This is called via call_runtime so the arguments + // will be place in C abi locations + + __ verify_oop(c_rarg0); + + // load the klass and check the has finalizer flag + Label register_finalizer; + Register t = r5; + __ load_klass(t, r0); + __ ldr(t, Address(t, Klass::access_flags_offset())); + __ tst(t, JVM_ACC_HAS_FINALIZER); + __ b(register_finalizer, Assembler::NE); + __ ret(lr); + + __ bind(register_finalizer); + __ enter(); + OopMap* oop_map = save_live_registers(sasm); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), r0); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + + // Now restore all the live registers + restore_live_registers(sasm); + + __ leave(); + __ ret(lr); + } + break; + + case throw_class_cast_exception_id: + { StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true); + } + break; + + case throw_incompatible_class_change_error_id: + { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false); + } + break; + + case slow_subtype_check_id: + { + // Typical calling sequence: + // __ push(klass_RInfo); // object klass or other subclass + // __ push(sup_k_RInfo); // array element klass or other superclass + // __ bl(slow_subtype_check); + // Note that the subclass is pushed first, and is therefore deepest. + enum layout { + r0_off, + r2_off, + r4_off, + r5_off, + sup_k_off, + klass_off, + framesize, + result_off = sup_k_off + }; + + __ set_info("slow_subtype_check", dont_gc_arguments); + __ push(RegSet::of(r0, r2, r4, r5), sp); + + // This is called by pushing args and not with C abi + __ ldr(r4, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass + __ ldr(r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass + + + Label miss; + __ check_klass_subtype_slow_path(r4, r0, r2, r5, NULL, &miss); + + // fallthrough on success: + __ mov(rscratch1, 1); + __ str(rscratch1, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop(RegSet::of(r0, r2, r4, r5), sp); + __ ret(lr); + + __ bind(miss); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result + __ pop(RegSet::of(r0, r2, r4, r5), sp); + __ ret(lr); + } + break; + + case monitorenter_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorenter_id: + { + StubFrame f(sasm, "monitorenter", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(1, r0); // r0,: object + f.load_argument(0, r1); // r1,: lock address + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), r0, r1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case monitorexit_nofpu_id: + save_fpu_registers = false; + // fall through + case monitorexit_id: + { + StubFrame f(sasm, "monitorexit", dont_gc_arguments); + OopMap* map = save_live_registers(sasm, save_fpu_registers); + + // Called with store_parameter and not C abi + + f.load_argument(0, r0); // r0,: lock address + + // note: really a leaf routine but must setup last java sp + // => use call_RT for now (speed can be improved by + // doing last java sp setup manually) + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), r0); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm, save_fpu_registers); + } + break; + + case deoptimize_id: + { + StubFrame f(sasm, "deoptimize", dont_gc_arguments); + OopMap* oop_map = save_live_registers(sasm); + f.load_argument(0, c_rarg1); + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), c_rarg1); + + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, oop_map); + restore_live_registers(sasm); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + __ leave(); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + case throw_range_check_failed_id: + { StubFrame f(sasm, "range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true); + } + break; + + case unwind_exception_id: + { __ set_info("unwind_exception", dont_gc_arguments); + // note: no stubframe since we are about to leave the current + // activation and we are calling a leaf VM function only. + generate_unwind_exception(sasm); + } + break; + + case access_field_patching_id: + { StubFrame f(sasm, "access_field_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching)); + } + break; + + case load_klass_patching_id: + { StubFrame f(sasm, "load_klass_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching)); + } + break; + + case load_mirror_patching_id: + { StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching)); + } + break; + + case load_appendix_patching_id: + { StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments); + // we should set up register map + oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching)); + } + break; + + case handle_exception_nofpu_id: + case handle_exception_id: + { StubFrame f(sasm, "handle_exception", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case handle_exception_from_callee_id: + { StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments); + oop_maps = generate_handle_exception(id, sasm); + } + break; + + case throw_index_exception_id: + { StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments); + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true); + } + break; + + case throw_array_store_exception_id: + { StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments); + // tos + 0: link + // + 1: return address + oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true); + } + break; + + case predicate_failed_trap_id: + { + StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments); + + OopMap* map = save_live_registers(sasm); + + int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap)); + oop_maps = new OopMapSet(); + oop_maps->add_gc_map(call_offset, map); + restore_live_registers(sasm); + __ leave(); + DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); + assert(deopt_blob != NULL, "deoptimization blob must have been created"); + + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + } + break; + + + default: + { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments); + __ mov(r0, (int)id); + __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0); + __ should_not_reach_here(); + } + break; + } + } + return oop_maps; +} + +#undef __ + +const char *Runtime1::pd_name_for_address(address entry) { +#ifdef __SOFTFP__ +#define FUNCTION_CASE(a, f) \ + if ((intptr_t)a == CAST_FROM_FN_PTR(intptr_t, f)) return #f + + FUNCTION_CASE(entry, SharedRuntime::i2f); + FUNCTION_CASE(entry, SharedRuntime::i2d); + FUNCTION_CASE(entry, SharedRuntime::f2d); + FUNCTION_CASE(entry, SharedRuntime::fcmpg); + FUNCTION_CASE(entry, SharedRuntime::fcmpl); + FUNCTION_CASE(entry, SharedRuntime::dcmpg); + FUNCTION_CASE(entry, SharedRuntime::dcmpl); + FUNCTION_CASE(entry, SharedRuntime::unordered_fcmple); + FUNCTION_CASE(entry, SharedRuntime::unordered_dcmple); +#undef FUNCTION_CASE +#endif + + return "Unknown_Func_Ptr"; +} --- /dev/null 2018-09-25 19:24:33.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c1_globals_aarch32.hpp 2018-09-25 19:24:32.000000000 +0300 @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_C1_GLOBALS_AARCH32_HPP +#define CPU_AARCH32_VM_C1_GLOBALS_AARCH32_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the client compiler. +// (see c1_globals.hpp) + +#ifndef TIERED +define_pd_global(bool, BackgroundCompilation, true ); +define_pd_global(bool, UseTLAB, true ); +define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); +define_pd_global(intx, FreqInlineSize, 325 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); +define_pd_global(intx, NonProfiledCodeHeapSize, 13*M ); +define_pd_global(intx, ProfiledCodeHeapSize, 14*M ); +define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(bool, ProfileInterpreter, false); +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); +define_pd_global(uintx, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true ); +define_pd_global(uint64_t,MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); +#endif // !TIERED +define_pd_global(bool, UseTypeProfile, false); +define_pd_global(bool, RoundFPResults, true ); + +define_pd_global(bool, LIRFillDelaySlots, false); +define_pd_global(bool, OptimizeSinglePrecision, true ); +define_pd_global(bool, CSEArrayLength, true ); +define_pd_global(bool, TwoOperandLIRForm, false); + +#endif // CPU_AARCH32_VM_C1_GLOBALS_AARCH32_HPP --- /dev/null 2018-09-25 19:24:34.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/c2_globals_aarch32.hpp 2018-09-25 19:24:33.000000000 +0300 @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_C2_GLOBALS_AARCH32_HPP +#define CPU_AARCH32_VM_C2_GLOBALS_AARCH32_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// +// Sets the default values for platform dependent flags used by the server compiler. +// (see c2_globals.hpp). Alpha-sorted. + +define_pd_global(bool, BackgroundCompilation, true); +define_pd_global(bool, CICompileOSR, true); +define_pd_global(bool, InlineIntrinsics, false); // TODO FIXME temporary, please enable +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, true); +define_pd_global(bool, UseOnStackReplacement, true); +define_pd_global(bool, ProfileInterpreter, true); +define_pd_global(bool, TieredCompilation, false); +define_pd_global(intx, CompileThreshold, 10000); + +define_pd_global(intx, OnStackReplacePercentage, 140); +define_pd_global(intx, ConditionalMoveLimit, 4); +// C2 gets to use all the float/double registers +define_pd_global(intx, FLOATPRESSURE, 30); +define_pd_global(intx, FreqInlineSize, 175); +define_pd_global(intx, INTPRESSURE, 12); +define_pd_global(intx, InteriorEntryAlignment, 32); // = CodeEntryAlignment +define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); +// The default setting 16/16 seems to work best. +// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.) +//define_pd_global(intx, OptoLoopAlignment, 16); // = 4*wordSize +define_pd_global(intx, RegisterCostAreaRatio, 16000); +define_pd_global(bool, UseTLAB, true); +define_pd_global(bool, ResizeTLAB, true); +define_pd_global(intx, LoopUnrollLimit, 60); +define_pd_global(intx, LoopPercentProfileLimit, 10); +define_pd_global(intx, MinJumpTableSize, 16); + +// Peephole and CISC spilling both break the graph, and so makes the +// scheduler sick. +define_pd_global(bool, OptoPeephole, false); +define_pd_global(bool, UseCISCSpill, false); +define_pd_global(bool, OptoBundling, false); +define_pd_global(bool, OptoScheduling, true); +define_pd_global(bool, OptoRegScheduling, false); +define_pd_global(bool, SuperWordLoopUnrollAnalysis, false); +define_pd_global(bool, IdealizeClearArrayNode, true); + +// InitialCodeCacheSize derived from specjbb2000 run. +define_pd_global(size_t, InitialCodeCacheSize, 1536*K); // Integral multiple of CodeCacheExpansionSize +define_pd_global(size_t, ReservedCodeCacheSize, 32*M); +define_pd_global(size_t, NonProfiledCodeHeapSize, 13*M); +define_pd_global(size_t, ProfiledCodeHeapSize, 14*M); +define_pd_global(size_t, NonNMethodCodeHeapSize, 5*M ); +define_pd_global(size_t, CodeCacheExpansionSize, 32*K); +// Ergonomics related flags +define_pd_global(uint64_t, MaxRAM, 4ULL*G); +define_pd_global(uintx, CodeCacheMinBlockLength, 4); +define_pd_global(size_t, CodeCacheMinimumUseSpace, 400*K); + +define_pd_global(bool, TrapBasedRangeChecks, false); // Not needed + +// Heap related flags +define_pd_global(size_t, MetaspaceSize, ScaleForWordSize(16*M)); + +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + +#endif // CPU_AARCH32_VM_C2_GLOBALS_AARCH32_HPP --- /dev/null 2018-09-25 19:24:35.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/codeBuffer_aarch32.hpp 2018-09-25 19:24:34.000000000 +0300 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_CODEBUFFER_AARCH32_HPP +#define CPU_AARCH32_VM_CODEBUFFER_AARCH32_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_AARCH32_VM_CODEBUFFER_AARCH32_HPP --- /dev/null 2018-09-25 19:24:36.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/compiledIC_aarch32.cpp 2018-09-25 19:24:35.000000000 +0300 @@ -0,0 +1,148 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" + +// ---------------------------------------------------------------------------- + +#define __ _masm. +address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + // Stub is fixed up when the corresponding call is converted from + // calling compiled code to calling interpreted code. + // mov rmethod, 0 + // jmp -4 # to self + + if (mark == NULL) { + mark = cbuf.insts_mark(); // Get mark within main instrs section. + } + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + address base = __ start_a_stub(to_interp_stub_size()); + + int offset = __ offset(); + if (base == NULL) { + return NULL; // CodeBuffer::expand failed + } + // static stub relocation stores the instruction address of the call + __ relocate(static_stub_Relocation::spec(mark)); + // static stub relocation also tags the Method* in the code-stream. + __ mov_metadata(rmethod, (Metadata*)NULL); + __ movptr(rscratch1, 0); + __ b(rscratch1); + + assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); + __ end_a_stub(); + return base; +} +#undef __ + +int CompiledStaticCall::to_interp_stub_size() { + return 7 * NativeInstruction::arm_insn_sz; +} + +int CompiledStaticCall::to_trampoline_stub_size() { + // AArch32 doesn't use trampoline stubs. + return 0; +} + +// Relocation entries for call stub, compiled java to interpreter. +int CompiledStaticCall::reloc_to_interp_stub() { + return 4; // 3 in emit_to_interp_stub + 1 in emit_call +} + +void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { + address stub = find_stub(false /* is_aot */); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + p2i(instruction_address()), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = NativeJump::from(method_holder->next_instruction_address()); +#ifndef PRODUCT + // read the value once + volatile intptr_t data = method_holder->data(); + assert(data == 0 || data == (intptr_t)callee(), + "a) MT-unsafe modification of inline cache"); + assert(data == 0 || jump->jump_destination() == entry, + "b) MT-unsafe modification of inline cache"); +#endif + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + ICache::invalidate_range(stub, to_interp_stub_size()); + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + method_holder->set_data(0); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledDirectStaticCall::verify() { + // Verify call. + _call->verify(); + if (os::is_MP()) { + _call->verify_alignment(); + } + + // Verify stub. + address stub = find_stub(false /* is_aot */); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT --- /dev/null 2018-09-25 19:24:37.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/copy_aarch32.hpp 2018-09-25 19:24:37.000000000 +0300 @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_COPY_AARCH32_HPP +#define CPU_AARCH32_VM_COPY_AARCH32_HPP + +// Inline functions for memory copy and fill. + +// Contains inline asm implementations +#include OS_CPU_HEADER_INLINE(copy) + + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { +/* julong* to = (julong*) tohw; + julong v = ((julong) value << 32) | value; + while (count-- > 0) { + *to++ = v; + }*/ + juint *to = (juint*)tohw; + while(count-- > 0) { + *to++ = value; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif // CPU_AARCH32_VM_COPY_AARCH32_HPP --- /dev/null 2018-09-25 19:24:38.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/cpustate_aarch32.hpp 2018-09-25 19:24:38.000000000 +0300 @@ -0,0 +1,596 @@ +/* + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef _CPU_STATE_H +#define _CPU_STATE_H + +#include + +/* + * symbolic names used to identify general registers which also match + * the registers indices in machine code + * + * We have 32 general registers which can be read/written as 32 bit or + * 64 bit sources/sinks and are appropriately referred to as Wn or Xn + * in the assembly code. Some instructions mix these access modes + * (e.g. ADD X0, X1, W2) so the implementation of the instruction + * needs to *know* which type of read or write access is required. + */ +enum GReg { + R0, + R1, + R2, + R3, + R4, + R5, + R6, + R7, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, + R16, + R17, + R18, + R19, + R20, + R21, + R22, + R23, + R24, + R25, + R26, + R27, + R28, + R29, + R30, + R31, + // and now the aliases + RSCRATCH1=R8, + RSCRATCH2=R9, + RMETHOD=R12, + RESP=R20, + RDISPATCH=R21, + RBCP=R22, + RLOCALS=R24, + RMONITORS=R25, + RCPOOL=R26, + RHEAPBASE=R27, + RTHREAD=R28, + FP = R29, + LR = R30, + SP = R31, + ZR = R31 +}; + +/* + * symbolic names used to refer to floating point registers which also + * match the registers indices in machine code + * + * We have 32 FP registers which can be read/written as 8, 16, 32, 64 + * and 128 bit sources/sinks and are appropriately referred to as Bn, + * Hn, Sn, Dn and Qn in the assembly code. Some instructions mix these + * access modes (e.g. FCVT S0, D0) so the implementation of the + * instruction needs to *know* which type of read or write access is + * required. + */ + +enum VReg { + V0, + V1, + V2, + V3, + V4, + V5, + V6, + V7, + V8, + V9, + V10, + V11, + V12, + V13, + V14, + V15, + V16, + V17, + V18, + V19, + V20, + V21, + V22, + V23, + V24, + V25, + V26, + V27, + V28, + V29, + V30, + V31, +}; + +/** + * all the different integer bit patterns for the components of a + * general register are overlaid here using a union so as to allow all + * reading and writing of the desired bits. + * + * n.b. the ARM spec says that when you write a 32 bit register you + * are supposed to write the low 32 bits and zero the high 32 + * bits. But we don't actually have to care about this because Java + * will only ever consume the 32 bits value as a 64 bit quantity after + * an explicit extend. + */ +union GRegisterValue +{ + int8_t s8; + int16_t s16; + int32_t s32; + int64_t s64; + u_int8_t u8; + u_int16_t u16; + u_int32_t u32; + u_int64_t u64; +}; + +class GRegister +{ +public: + GRegisterValue value; +}; + +/* + * float registers provide for storage of a single, double or quad + * word format float in the same register. single floats are not + * paired within each double register as per 32 bit arm. instead each + * 128 bit register Vn embeds the bits for Sn, and Dn in the lower + * quarter and half, respectively, of the bits for Qn. + * + * The upper bits can also be accessed as single or double floats by + * the float vector operations using indexing e.g. V1.D[1], V1.S[3] + * etc and, for SIMD operations using a horrible index range notation. + * + * The spec also talks about accessing float registers as half words + * and bytes with Hn and Bn providing access to the low 16 and 8 bits + * of Vn but it is not really clear what these bits represent. We can + * probably ignore this for Java anyway. However, we do need to access + * the raw bits at 32 and 64 bit resolution to load to/from integer + * registers. + */ + +union FRegisterValue +{ + float s; + double d; + long double q; + // eventually we will need to be able to access the data as a vector + // the integral array elements allow us to access the bits in s, d, + // q, vs and vd at an appropriate level of granularity + u_int8_t vb[16]; + u_int16_t vh[8]; + u_int32_t vw[4]; + u_int64_t vx[2]; + float vs[4]; + double vd[2]; +}; + +class FRegister +{ +public: + FRegisterValue value; +}; + +/* + * CPSR register -- this does not exist as a directly accessible + * register but we need to store the flags so we can implement + * flag-seting and flag testing operations + * + * we can possibly use injected x86 asm to report the outcome of flag + * setting operations. if so we will need to grab the flags + * immediately after the operation in order to ensure we don't lose + * them because of the actions of the simulator. so we still need + * somewhere to store the condition codes. + */ + +class CPSRRegister +{ +public: + u_int32_t value; + +/* + * condition register bit select values + * + * the order of bits here is important because some of + * the flag setting conditional instructions employ a + * bit field to populate the flags when a false condition + * bypasses execution of the operation and we want to + * be able to assign the flags register using the + * supplied value. + */ + + enum CPSRIdx { + V_IDX, + C_IDX, + Z_IDX, + N_IDX + }; + + enum CPSRMask { + V = 1 << V_IDX, + C = 1 << C_IDX, + Z = 1 << Z_IDX, + N = 1 << N_IDX + }; + + static const int CPSR_ALL_FLAGS = (V | C | Z | N); +}; + +// auxiliary function to assemble the relevant bits from +// the x86 EFLAGS register into an ARM CPSR value + +#define X86_V_IDX 11 +#define X86_C_IDX 0 +#define X86_Z_IDX 6 +#define X86_N_IDX 7 + +#define X86_V (1 << X86_V_IDX) +#define X86_C (1 << X86_C_IDX) +#define X86_Z (1 << X86_Z_IDX) +#define X86_N (1 << X86_N_IDX) + +inline u_int32_t convertX86Flags(u_int32_t x86flags) +{ + u_int32_t flags; + // set N flag + flags = ((x86flags & X86_N) >> X86_N_IDX); + // shift then or in Z flag + flags <<= 1; + flags |= ((x86flags & X86_Z) >> X86_Z_IDX); + // shift then or in C flag + flags <<= 1; + flags |= ((x86flags & X86_C) >> X86_C_IDX); + // shift then or in V flag + flags <<= 1; + flags |= ((x86flags & X86_V) >> X86_V_IDX); + + return flags; +} + +inline u_int32_t convertX86FlagsFP(u_int32_t x86flags) +{ + // x86 flags set by fcomi(x,y) are ZF:PF:CF + // (yes, that's PF for parity, WTF?) + // where + // 0) 0:0:0 means x > y + // 1) 0:0:1 means x < y + // 2) 1:0:0 means x = y + // 3) 1:1:1 means x and y are unordered + // note that we don't have to check PF so + // we really have a simple 2-bit case switch + // the corresponding ARM64 flags settings + // in hi->lo bit order are + // 0) --C- + // 1) N--- + // 2) -ZC- + // 3) --CV + + static u_int32_t armFlags[] = { + 0b0010, + 0b1000, + 0b0110, + 0b0011 + }; + // pick out the ZF and CF bits + u_int32_t zc = ((x86flags & X86_Z) >> X86_Z_IDX); + zc <<= 1; + zc |= ((x86flags & X86_C) >> X86_C_IDX); + + return armFlags[zc]; +} + +/* + * FPSR register -- floating point status register + + * this register includes IDC, IXC, UFC, OFC, DZC, IOC and QC bits, + * and the floating point N, Z, C, V bits but the latter are unused in + * aarch32 mode. the sim ignores QC for now. + * + * bit positions are as per the ARMv7 FPSCR register + * + * IDC : 7 ==> Input Denormal (cumulative exception bit) + * IXC : 4 ==> Inexact + * UFC : 3 ==> Underflow + * OFC : 2 ==> Overflow + * DZC : 1 ==> Division by Zero + * IOC : 0 ==> Invalid Operation + */ + +class FPSRRegister +{ +public: + u_int32_t value; + // indices for bits in the FPSR register value + enum FPSRIdx { + IO_IDX = 0, + DZ_IDX = 1, + OF_IDX = 2, + UF_IDX = 3, + IX_IDX = 4, + ID_IDX = 7 + }; + // corresponding bits as numeric values + enum FPSRMask { + IO = (1 << IO_IDX), + DZ = (1 << DZ_IDX), + OF = (1 << OF_IDX), + UF = (1 << UF_IDX), + IX = (1 << IX_IDX), + ID = (1 << ID_IDX) + }; + static const int FPSR_ALL_FPSRS = (IO | DZ | OF | UF | IX | ID); +}; + +// debugger support + +enum PrintFormat +{ + FMT_DECIMAL, + FMT_HEX, + FMT_SINGLE, + FMT_DOUBLE, + FMT_QUAD, + FMT_MULTI +}; + +/* + * model of the registers and other state associated with the cpu + */ +class CPUState +{ + friend class AArch64Simulator; +private: + // this is the PC of the instruction being executed + u_int64_t pc; + // this is the PC of the instruction to be executed next + // it is defaulted to pc + 4 at instruction decode but + // execute may reset it + + u_int64_t nextpc; + GRegister gr[33]; // extra register at index 32 is used + // to hold zero value + FRegister fr[32]; + CPSRRegister cpsr; + FPSRRegister fpsr; + +public: + + CPUState() { + gr[20].value.u64 = 0; // establish initial condition for + // checkAssertions() + trace_counter = 0; + } + + // General Register access macros + + // only xreg or xregs can be used as an lvalue in order to update a + // register. this ensures that the top part of a register is always + // assigned when it is written by the sim. + + inline u_int64_t &xreg(GReg reg, int r31_is_sp) { + if (reg == R31 && !r31_is_sp) { + return gr[32].value.u64; + } else { + return gr[reg].value.u64; + } + } + + inline int64_t &xregs(GReg reg, int r31_is_sp) { + if (reg == R31 && !r31_is_sp) { + return gr[32].value.s64; + } else { + return gr[reg].value.s64; + } + } + + inline u_int32_t wreg(GReg reg, int r31_is_sp) { + if (reg == R31 && !r31_is_sp) { + return gr[32].value.u32; + } else { + return gr[reg].value.u32; + } + } + + inline int32_t wregs(GReg reg, int r31_is_sp) { + if (reg == R31 && !r31_is_sp) { + return gr[32].value.s32; + } else { + return gr[reg].value.s32; + } + } + + inline u_int32_t hreg(GReg reg, int r31_is_sp) { + if (reg == R31 && !r31_is_sp) { + return gr[32].value.u16; + } else { + return gr[reg].value.u16; + } + } + + inline int32_t hregs(GReg reg, int r31_is_sp) { + if (reg == R31 && !r31_is_sp) { + return gr[32].value.s16; + } else { + return gr[reg].value.s16; + } + } + + inline u_int32_t breg(GReg reg, int r31_is_sp) { + if (reg == R31 && !r31_is_sp) { + return gr[32].value.u8; + } else { + return gr[reg].value.u8; + } + } + + inline int32_t bregs(GReg reg, int r31_is_sp) { + if (reg == R31 && !r31_is_sp) { + return gr[32].value.s8; + } else { + return gr[reg].value.s8; + } + } + + // FP Register access macros + + // all non-vector accessors return a reference so we can both read + // and assign + + inline float &sreg(VReg reg) { + return fr[reg].value.s; + } + + inline double &dreg(VReg reg) { + return fr[reg].value.d; + } + + inline long double &qreg(VReg reg) { + return fr[reg].value.q; + } + + // all vector register accessors return a pointer + + inline float *vsreg(VReg reg) { + return &fr[reg].value.vs[0]; + } + + inline double *vdreg(VReg reg) { + return &fr[reg].value.vd[0]; + } + + inline u_int8_t *vbreg(VReg reg) { + return &fr[reg].value.vb[0]; + } + + inline u_int16_t *vhreg(VReg reg) { + return &fr[reg].value.vh[0]; + } + + inline u_int32_t *vwreg(VReg reg) { + return &fr[reg].value.vw[0]; + } + + inline u_int64_t *vxreg(VReg reg) { + return &fr[reg].value.vx[0]; + } + + union GRegisterValue prev_sp, prev_fp; + + static const int trace_size = 256; + u_int64_t trace_buffer[trace_size]; + int trace_counter; + + bool checkAssertions() + { + // Make sure that SP is 16-aligned + // Also make sure that ESP is above SP. + // We don't care about checking ESP if it is null, i.e. it hasn't + // been used yet. + if (gr[31].value.u64 & 0x0f) { + asm volatile("nop"); + return false; + } + return true; + } + + // pc register accessors + + // this instruction can be used to fetch the current PC + u_int64_t getPC(); + // instead of setting the current PC directly you can + // first set the next PC (either absolute or PC-relative) + // and later copy the next PC into the current PC + // this supports a default increment by 4 at instruction + // fetch with an optional reset by control instructions + u_int64_t getNextPC(); + void setNextPC(u_int64_t next); + void offsetNextPC(int64_t offset); + // install nextpc as current pc + void updatePC(); + + // this instruction can be used to save the next PC to LR + // just before installing a branch PC + inline void saveLR() { gr[LR].value.u64 = nextpc; } + + // cpsr register accessors + u_int32_t getCPSRRegister(); + void setCPSRRegister(u_int32_t flags); + // read a specific subset of the flags as a bit pattern + // mask should be composed using elements of enum FlagMask + u_int32_t getCPSRBits(u_int32_t mask); + // assign a specific subset of the flags as a bit pattern + // mask and value should be composed using elements of enum FlagMask + void setCPSRBits(u_int32_t mask, u_int32_t value); + // test the value of a single flag returned as 1 or 0 + u_int32_t testCPSR(CPSRRegister::CPSRIdx idx); + // set a single flag + void setCPSR(CPSRRegister::CPSRIdx idx); + // clear a single flag + void clearCPSR(CPSRRegister::CPSRIdx idx); + // utility method to set ARM CSPR flags from an x86 bit mask generated by integer arithmetic + void setCPSRRegisterFromX86(u_int64_t x86Flags); + // utility method to set ARM CSPR flags from an x86 bit mask generated by floating compare + void setCPSRRegisterFromX86FP(u_int64_t x86Flags); + + // fpsr register accessors + u_int32_t getFPSRRegister(); + void setFPSRRegister(u_int32_t flags); + // read a specific subset of the fprs bits as a bit pattern + // mask should be composed using elements of enum FPSRRegister::FlagMask + u_int32_t getFPSRBits(u_int32_t mask); + // assign a specific subset of the flags as a bit pattern + // mask and value should be composed using elements of enum FPSRRegister::FlagMask + void setFPSRBits(u_int32_t mask, u_int32_t value); + // test the value of a single flag returned as 1 or 0 + u_int32_t testFPSR(FPSRRegister::FPSRIdx idx); + // set a single flag + void setFPSR(FPSRRegister::FPSRIdx idx); + // clear a single flag + void clearFPSR(FPSRRegister::FPSRIdx idx); + + // debugger support + void printPC(int pending, const char *trailing = "\n"); + void printInstr(u_int32_t instr, void (*dasm)(u_int64_t), const char *trailing = "\n"); + void printGReg(GReg reg, PrintFormat format = FMT_HEX, const char *trailing = "\n"); + void printVReg(VReg reg, PrintFormat format = FMT_HEX, const char *trailing = "\n"); + void printCPSR(const char *trailing = "\n"); + void printFPSR(const char *trailing = "\n"); + void dumpState(); +}; + +#endif // ifndef _CPU_STATE_H --- /dev/null 2018-09-25 19:24:39.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/debug_aarch32.cpp 2018-09-25 19:24:39.000000000 +0300 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nmethod.hpp" +#include "runtime/frame.hpp" +#include "runtime/init.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" + +void pd_ps(frame f) {} --- /dev/null 2018-09-25 19:24:40.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/decode_aarch32.hpp 2018-09-25 19:24:40.000000000 +0300 @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef _DECODE_H +#define _DECODE_H + +#include +#include "cpustate_aarch32.hpp" + +// bitfield immediate expansion helper + +extern int expandLogicalImmediate(u_int32_t immN, u_int32_t immr, + u_int32_t imms, u_int64_t &bimm); + + +/* + * codes used in conditional instructions + * + * these are passed to conditional operations to identify which + * condition to test for + */ +enum CondCode { + EQ = 0b0000, // meaning Z == 1 + NE = 0b0001, // meaning Z == 0 + HS = 0b0010, // meaning C == 1 + CS = HS, + LO = 0b0011, // meaning C == 0 + CC = LO, + MI = 0b0100, // meaning N == 1 + PL = 0b0101, // meaning N == 0 + VS = 0b0110, // meaning V == 1 + VC = 0b0111, // meaning V == 0 + HI = 0b1000, // meaning C == 1 && Z == 0 + LS = 0b1001, // meaning !(C == 1 && Z == 0) + GE = 0b1010, // meaning N == V + LT = 0b1011, // meaning N != V + GT = 0b1100, // meaning Z == 0 && N == V + LE = 0b1101, // meaning !(Z == 0 && N == V) + AL = 0b1110, // meaning ANY + NV = 0b1111 // ditto +}; + +/* + * certain addressing modes for load require pre or post writeback of + * the computed address to a base register + */ +enum WriteBack { + Post = 0, + Pre = 1 +}; + +/* + * certain addressing modes for load require an offset to + * be optionally scaled so the decode needs to pass that + * through to the execute routine + */ +enum Scaling { + Unscaled = 0, + Scaled = 1 +}; + +/* + * when we do have to scale we do so by shifting using + * log(bytes in data element - 1) as the shift count. + * so we don't have to scale offsets when loading + * bytes. + */ +enum ScaleShift { + ScaleShift16 = 1, + ScaleShift32 = 2, + ScaleShift64 = 3, + ScaleShift128 = 4 +}; + +/* + * one of the addressing modes for load requires a 32-bit register + * value to be either zero- or sign-extended for these instructions + * UXTW or SXTW should be passed + * + * arithmetic register data processing operations can optionally + * extend a portion of the second register value for these + * instructions the value supplied must identify the portion of the + * register which is to be zero- or sign-exended + */ +enum Extension { + UXTB = 0, + UXTH = 1, + UXTW = 2, + UXTX = 3, + SXTB = 4, + SXTH = 5, + SXTW = 6, + SXTX = 7 +}; + +/* + * arithmetic and logical register data processing operations + * optionally perform a shift on the second register value + */ +enum Shift { + LSL = 0, + LSR = 1, + ASR = 2, + ROR = 3 +}; + +/* + * bit twiddling helpers for instruction decode + */ + +// 32 bit mask with bits [hi,...,lo] set + +static inline u_int32_t mask32(int hi = 31, int lo = 0) +{ + int nbits = (hi + 1) - lo; + return ((1 << nbits) - 1) << lo; +} + +static inline u_int64_t mask64(int hi = 63, int lo = 0) +{ + int nbits = (hi + 1) - lo; + return ((1L << nbits) - 1) << lo; +} + +// pick bits [hi,...,lo] from val +static inline u_int32_t pick32(u_int32_t val, int hi = 31, int lo = 0) +{ + return (val & mask32(hi, lo)); +} + +// pick bits [hi,...,lo] from val +static inline u_int64_t pick64(u_int64_t val, int hi = 31, int lo = 0) +{ + return (val & mask64(hi, lo)); +} + +// pick bits [hi,...,lo] from val and shift to [(hi-(newlo - lo)),newlo] +static inline u_int32_t pickshift32(u_int32_t val, int hi = 31, + int lo = 0, int newlo = 0) +{ + u_int32_t bits = pick32(val, hi, lo); + if (lo < newlo) { + return (bits << (newlo - lo)); + } else { + return (bits >> (lo - newlo)); + } +} +// mask [hi,lo] and shift down to start at bit 0 +static inline u_int32_t pickbits32(u_int32_t val, int hi = 31, int lo = 0) +{ + return (pick32(val, hi, lo) >> lo); +} + +// mask [hi,lo] and shift down to start at bit 0 +static inline u_int64_t pickbits64(u_int64_t val, int hi = 63, int lo = 0) +{ + return (pick64(val, hi, lo) >> lo); +} + +/* + * decode registers, immediates and constants of various types + */ + +static inline GReg greg(u_int32_t val, int lo) +{ + return (GReg)pickbits32(val, lo + 4, lo); +} + +static inline VReg vreg(u_int32_t val, int lo) +{ + return (VReg)pickbits32(val, lo + 4, lo); +} + +static inline u_int32_t uimm(u_int32_t val, int hi, int lo) +{ + return pickbits32(val, hi, lo); +} + +static inline int32_t simm(u_int32_t val, int hi = 31, int lo = 0) { + union { + u_int32_t u; + int32_t n; + }; + + u = val << (31 - hi); + n = n >> (31 - hi + lo); + return n; +} + +static inline int64_t simm(u_int64_t val, int hi = 63, int lo = 0) { + union { + u_int64_t u; + int64_t n; + }; + + u = val << (63 - hi); + n = n >> (63 - hi + lo); + return n; +} + +static inline Shift shift(u_int32_t val, int lo) +{ + return (Shift)pickbits32(val, lo+1, lo); +} + +static inline Extension extension(u_int32_t val, int lo) +{ + return (Extension)pickbits32(val, lo+2, lo); +} + +static inline Scaling scaling(u_int32_t val, int lo) +{ + return (Scaling)pickbits32(val, lo, lo); +} + +static inline WriteBack writeback(u_int32_t val, int lo) +{ + return (WriteBack)pickbits32(val, lo, lo); +} + +static inline CondCode condcode(u_int32_t val, int lo) +{ + return (CondCode)pickbits32(val, lo+3, lo); +} + +/* + * operation decode + */ +// bits [28,25] are the primary dispatch vector + +static inline u_int32_t dispatchGroup(u_int32_t val) +{ + return pickshift32(val, 28, 25, 0); +} + +/* + * the 16 possible values for bits [28,25] identified by tags which + * map them to the 5 main instruction groups LDST, DPREG, ADVSIMD, + * BREXSYS and DPIMM. + * + * An extra group PSEUDO is included in one of the unallocated ranges + * for simulator-specific pseudo-instructions. + */ +enum DispatchGroup { + GROUP_PSEUDO_0000, + GROUP_UNALLOC_0001, + GROUP_UNALLOC_0010, + GROUP_UNALLOC_0011, + GROUP_LDST_0100, + GROUP_DPREG_0101, + GROUP_LDST_0110, + GROUP_ADVSIMD_0111, + GROUP_DPIMM_1000, + GROUP_DPIMM_1001, + GROUP_BREXSYS_1010, + GROUP_BREXSYS_1011, + GROUP_LDST_1100, + GROUP_DPREG_1101, + GROUP_LDST_1110, + GROUP_ADVSIMD_1111 +}; + +// bits [31, 29] of a Pseudo are the secondary dispatch vector + +static inline u_int32_t dispatchPseudo(u_int32_t val) +{ + return pickshift32(val, 31, 29, 0); +} + +/* + * the 8 possible values for bits [31,29] in a Pseudo Instruction. + * Bits [28,25] are always 0000. + */ + +enum DispatchPseudo { + PSEUDO_UNALLOC_000, // unallocated + PSEUDO_UNALLOC_001, // ditto + PSEUDO_UNALLOC_010, // ditto + PSEUDO_UNALLOC_011, // ditto + PSEUDO_UNALLOC_100, // ditto + PSEUDO_UNALLOC_101, // ditto + PSEUDO_CALLOUT_110, // CALLOUT -- bits [24,0] identify call/ret sig + PSEUDO_HALT_111 // HALT -- bits [24, 0] identify halt code +}; + +// bits [25, 23] of a DPImm are the secondary dispatch vector + +static inline u_int32_t dispatchDPImm(u_int32_t instr) +{ + return pickshift32(instr, 25, 23, 0); +} + +/* + * the 8 possible values for bits [25,23] in a Data Processing Immediate + * Instruction. Bits [28,25] are always 100_. + */ + +enum DispatchDPImm { + DPIMM_PCADR_000, // PC-rel-addressing + DPIMM_PCADR_001, // ditto + DPIMM_ADDSUB_010, // Add/Subtract (immediate) + DPIMM_ADDSUB_011, // ditto + DPIMM_LOG_100, // Logical (immediate) + DPIMM_MOV_101, // Move Wide (immediate) + DPIMM_BITF_110, // Bitfield + DPIMM_EXTR_111 // Extract +}; + +// bits [29,28:26] of a LS are the secondary dispatch vector + +static inline u_int32_t dispatchLS(u_int32_t instr) +{ + return (pickshift32(instr, 29, 28, 1) | + pickshift32(instr, 26, 26, 0)); +} + +/* + * the 8 possible values for bits [29,28:26] in a Load/Store + * Instruction. Bits [28,25] are always _1_0 + */ + +enum DispatchLS { + LS_EXCL_000, // Load/store exclusive (includes some unallocated) + LS_ADVSIMD_001, // AdvSIMD load/store (various -- includes some unallocated) + LS_LIT_010, // Load register literal (includes some unallocated) + LS_LIT_011, // ditto + LS_PAIR_100, // Load/store register pair (various) + LS_PAIR_101, // ditto + LS_OTHER_110, // other load/store formats + LS_OTHER_111 // ditto +}; + +// bits [28:24:21] of a DPReg are the secondary dispatch vector + +static inline u_int32_t dispatchDPReg(u_int32_t instr) +{ + return (pickshift32(instr, 28, 28, 2) | + pickshift32(instr, 24, 24, 1) | + pickshift32(instr, 21, 21, 0)); +} + +/* + * the 8 possible values for bits [28:24:21] in a Data Processing + * Register Instruction. Bits [28,25] are always _101 + */ + +enum DispatchDPReg { + DPREG_LOG_000, // Logical (shifted register) + DPREG_LOG_001, // ditto + DPREG_ADDSHF_010, // Add/subtract (shifted register) + DPREG_ADDEXT_011, // Add/subtract (extended register) + DPREG_ADDCOND_100, // Add/subtract (with carry) AND + // Cond compare/select AND + // Data Processing (1/2 source) + DPREG_UNALLOC_101, // Unallocated + DPREG_3SRC_110, // Data Processing (3 source) + DPREG_3SRC_111 // Data Processing (3 source) +}; + +// bits [31,29] of a BrExSys are the secondary dispatch vector + +static inline u_int32_t dispatchBrExSys(u_int32_t instr) +{ + return pickbits32(instr, 31, 29); +} + +/* + * the 8 possible values for bits [31,29] in a Branch/Exception/System + * Instruction. Bits [28,25] are always 101_ + */ + +enum DispatchBr { + BR_IMM_000, // Unconditional branch (immediate) + BR_IMMCMP_001, // Compare & branch (immediate) AND + // Test & branch (immediate) + BR_IMMCOND_010, // Conditional branch (immediate) AND Unallocated + BR_UNALLOC_011, // Unallocated + BR_IMM_100, // Unconditional branch (immediate) + BR_IMMCMP_101, // Compare & branch (immediate) AND + // Test & branch (immediate) + BR_REG_110, // Unconditional branch (register) AND System AND + // Excn gen AND Unallocated + BR_UNALLOC_111 // Unallocated +}; + +/* + * TODO still need to provide secondary decode and dispatch for + * AdvSIMD Insructions with instr[28,25] = 0111 or 1111 + */ + +#endif // ifndef DECODE_H --- /dev/null 2018-09-25 19:24:41.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/depChecker_aarch32.cpp 2018-09-25 19:24:41.000000000 +0300 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "compiler/disassembler.hpp" +#include "depChecker_aarch32.hpp" + +// Nothing to do on aarch32 --- /dev/null 2018-09-25 19:24:43.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/depChecker_aarch32.hpp 2018-09-25 19:24:42.000000000 +0300 @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_DEPCHECKER_AARCH32_HPP +#define CPU_AARCH32_VM_DEPCHECKER_AARCH32_HPP + +// Nothing to do on aarch32 + +#endif // CPU_AARCH32_VM_DEPCHECKER_AARCH32_HPP --- /dev/null 2018-09-25 19:24:44.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/disassembler_aarch32.hpp 2018-09-25 19:24:43.000000000 +0300 @@ -0,0 +1,38 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_DISASSEMBLER_AARCH32_HPP +#define CPU_AARCH32_VM_DISASSEMBLER_AARCH32_HPP + + static int pd_instruction_alignment() { + return 1; + } + + static const char* pd_cpu_opts() { + return ""; + } + +#endif // CPU_AARCH32_VM_DISASSEMBLER_AARCH32_HPP --- /dev/null 2018-09-25 19:24:45.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/frame_aarch32.cpp 2018-09-25 19:24:44.000000000 +0300 @@ -0,0 +1,820 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/os.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_aarch32.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif + + +// Profiling/safepoint support + +bool frame::safe_for_sender(JavaThread *thread) { + address sp = (address)_sp; + address fp = (address)_fp; + address unextended_sp = (address)_unextended_sp; + + // consider stack guards when trying to determine "safe" stack pointers + static size_t stack_guard_size = os::uses_stack_guard_pages() ? + (JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size()) : 0; + size_t usable_stack_size = thread->stack_size() - stack_guard_size; + + // sp must be within the usable part of the stack (not in guards) + bool sp_safe = (sp < thread->stack_base()) && + (sp >= thread->stack_base() - usable_stack_size); + + + if (!sp_safe) { + return false; + } + + // unextended sp must be within the stack and above or equal sp + bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && + (unextended_sp >= sp); + + if (!unextended_sp_safe) { + return false; + } + + // We know sp/unextended_sp are safe only fp is questionable here + + // If the current frame is known to the code cache then we can attempt to + // to construct the sender and do some validation of it. This goes a long way + // toward eliminating issues when we get in frame construction code + + if (_cb != NULL ) { + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + const bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (get_return_addr_offset() * sizeof(void*))) < thread->stack_base()))); + + + // First check if frame is complete and tester is reliable + // Unfortunately we can only check frame complete for runtime stubs and nmethod + // other generic buffer blobs are more problematic so we just assume they are + // ok. adapter blobs never have a frame complete and are never ok. + + if (!_cb->is_frame_complete_at(_pc)) { + if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { + return false; + } + } + + // Could just be some random pointer within the codeBlob + if (!_cb->code_contains(_pc)) { + return false; + } + + // Entry frame checks + if (is_entry_frame()) { + // an entry frame must have a valid fp. + return fp_safe && is_entry_frame_valid(thread); + } + + intptr_t* sender_sp = NULL; + intptr_t* sender_unextended_sp = NULL; + address sender_pc = NULL; + intptr_t* saved_fp = NULL; + + if (is_interpreted_frame()) { + // fp must be safe + if (!fp_safe) { + return false; + } + + sender_pc = (address) this->fp()[get_return_addr_offset()]; + // for interpreted frames, the value below is the sender "raw" sp, + // which can be different from the sender unextended sp (the sp seen + // by the sender) because of current frame local variables + sender_sp = (intptr_t*) addr_at(sender_sp_offset); + sender_unextended_sp = (intptr_t*) this->fp()[get_interpreter_frame_sender_sp_offset()]; + saved_fp = (intptr_t*) this->fp()[get_link_offset()]; + + } else { + // must be some sort of compiled/runtime frame + // fp does not have to be safe (although it could be check for c1?) + + // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc + if (_cb->frame_size() <= 0) { + return false; + } + + sender_sp = _unextended_sp + _cb->frame_size(); + // Is sender_sp safe? + if ((address)sender_sp >= thread->stack_base()) { + return false; + } + sender_unextended_sp = sender_sp; + sender_pc = (address) *(sender_sp - 1 + frame::get_return_addr_offset()); + // Note: frame::sender_sp_offset is only valid for compiled frame + saved_fp = (intptr_t*) *(sender_sp - 1 + frame::get_link_offset()); + } + + + // If the potential sender is the interpreter then we can do some more checking + if (Interpreter::contains(sender_pc)) { + + // fp is always saved in a recognizable place in any code we generate. However + // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp + // is really a frame pointer. + + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + return sender.is_interpreted_frame_valid(thread); + + } + + // We must always be able to find a recognizable pc + CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); + if (sender_pc == NULL || sender_blob == NULL) { + return false; + } + + // Could be a zombie method + if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { + return false; + } + + // Could just be some random pointer within the codeBlob + if (!sender_blob->code_contains(sender_pc)) { + return false; + } + + // We should never be able to see an adapter if the current frame is something from code cache + if (sender_blob->is_adapter_blob()) { + return false; + } + + // Could be the call_stub + if (StubRoutines::returns_to_call_stub(sender_pc)) { + bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); + + if (!saved_fp_safe) { + return false; + } + + // construct the potential sender + + frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); + + // Validate the JavaCallWrapper an entry frame must have + address jcw = (address)sender.entry_frame_call_wrapper(); + + bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp()); + + return jcw_safe; + } + + CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); + if (nm != NULL) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || + nm->method()->is_method_handle_intrinsic()) { + return false; + } + } + + // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size + // because the return address counts against the callee's frame. + + if (sender_blob->frame_size() <= 0) { + assert(!sender_blob->is_compiled(), "should count return address at least"); + return false; + } + + // We should never be able to see anything here except an nmethod. If something in the + // code cache (current frame) is called by an entity within the code cache that entity + // should not be anything but the call stub (already covered), the interpreter (already covered) + // or an nmethod. + + if (!sender_blob->is_compiled()) { + return false; + } + + // Could put some more validation for the potential non-interpreted sender + // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... + + // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb + + // We've validated the potential sender that would be created + return true; + } + + // an fp must be within the stack and above (but not equal) sp + // second evaluation on fp+ is added to handle situation where fp is -1 + const bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (get_return_addr_offset(VMFrameAPCS) * sizeof(void*))) < thread->stack_base()))); + + // Must be native-compiled frame. Since sender will try and use fp to find + // linkages it must be safe + + if (!fp_safe) { + return false; + } + + // Will the pc we fetch be non-zero (which we'll find at the oldest frame) + + if ( (address) this->fp()[get_return_addr_offset(VMFrameAPCS)] == NULL) return false; + + + // could try and do some more potential verification of native frame if we could think of some... + + return true; + +} + +void frame::patch_pc(Thread* thread, address pc) { + address* pc_addr = &(((address*) sp())[-1 + frame::get_return_addr_offset()]); + if (TracePcPatching) { + tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", + p2i(pc_addr), p2i(*pc_addr), p2i(pc)); + } + // Either the return address is the original one or we are going to + // patch in the same address that's already there. + assert(_pc == *pc_addr || pc == *pc_addr, "must be"); + *pc_addr = pc; + _cb = CodeCache::find_blob(pc); + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + assert(original_pc == _pc, "expected original PC to be stored before patching"); + _deopt_state = is_deoptimized; + // leave _pc as is + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +int frame::frame_size(RegisterMap* map) const { + frame sender = this->sender(map); + return sender.sp() - sp(); +} + +intptr_t* frame::entry_frame_argument_at(int offset) const { + // convert offset to index to deal with tsi + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + // Entry frame's arguments are always in relation to unextended_sp() + return &unextended_sp()[index]; +} + +// sender_sp +intptr_t* frame::interpreter_frame_sender_sp() const { + assert(is_interpreted_frame(), "interpreted frame expected"); + return (intptr_t*) at(get_interpreter_frame_sender_sp_offset()); +} + +void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { + assert(is_interpreted_frame(), "interpreted frame expected"); + ptr_at_put(get_interpreter_frame_sender_sp_offset(), (intptr_t) sender_sp); +} + + +// monitor elements + +BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return (BasicObjectLock*) addr_at(get_interpreter_frame_monitor_block_bottom_offset()); +} + +BasicObjectLock* frame::interpreter_frame_monitor_end() const { + BasicObjectLock* result = (BasicObjectLock*) *addr_at(get_interpreter_frame_monitor_block_top_offset()); + // make sure the pointer points inside the frame + assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer"); + assert((intptr_t*) result < fp(), "monitor end should be strictly below the frame pointer"); + return result; +} + +void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { + *((BasicObjectLock**)addr_at(get_interpreter_frame_monitor_block_top_offset())) = value; +} + +// Used by template based interpreter deoptimization +void frame::interpreter_frame_set_last_sp(intptr_t* sp) { + *((intptr_t**)addr_at(get_interpreter_frame_last_sp_offset())) = sp; +} + +frame frame::sender_for_entry_frame(RegisterMap* map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); + // Since we are walking the stack now this nested anchor is obviously walkable + // even if it wasn't when it was stacked. + if (!jfa->walkable()) { + // Capture _last_Java_pc (if needed) and mark anchor walkable. + jfa->capture_last_Java_pc(); + } + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + vmassert(jfa->last_Java_pc() != NULL, "not walkable"); + frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); + return fr; +} + +//------------------------------------------------------------------------------ +// frame::verify_deopt_original_pc +// +// Verifies the calculated original PC of a deoptimization PC for the +// given unextended SP. +#ifdef ASSERT +void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { + frame fr; + + // This is ugly but it's better than to change {get,set}_original_pc + // to take an SP value as argument. And it's only a debugging + // method anyway. + fr._unextended_sp = unextended_sp; + + address original_pc = nm->get_original_pc(&fr); + assert(nm->insts_contains_inclusive(original_pc), + "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); +} +#endif + +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { + // On aarch32, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + + if (_cb != NULL) { + CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); + if (sender_cm != NULL) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_cm->is_deopt_entry(_pc) || + sender_cm->is_deopt_mh_entry(_pc)) { + DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); + } + } + } +} + +//------------------------------------------------------------------------------ +// frame::update_map_with_saved_link +void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { + // The interpreter and compiler(s) always save fp in a known + // location on entry. We must record where that location is + // so that if fp was live on callout from c2 we can find + // the saved copy no matter what it called. + + // Since the interpreter always saves fp if we record where it is then + // we don't have to always save fp on entry and exit to c2 compiled + // code, on entry will be enough. + map->set_location(rfp->as_VMReg(), (address) link_addr); +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_interpreter_frame +frame frame::sender_for_interpreter_frame(RegisterMap* map) const { + // SP is the raw SP from the sender after adapter or interpreter + // extension. + intptr_t* sender_sp = this->sender_sp(); + + // This is the sp before any possible extension (adapter/locals). + intptr_t* unextended_sp = interpreter_frame_sender_sp(); + +#if COMPILER2_OR_JVMCI + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(get_link_offset())); + } +#endif // COMPILER2_OR_JVMCI + + address sender_pc = *(address*) addr_at(get_return_addr_offset()); + intptr_t *link = *(intptr_t **)addr_at(get_link_offset()); + + return frame(sender_sp, unextended_sp, link, sender_pc); +} + + +//------------------------------------------------------------------------------ +// frame::sender_for_compiled_frame +frame frame::sender_for_compiled_frame(RegisterMap* map) const { + // we cannot rely upon the last fp having been saved to the thread + // in C2 code but it will have been pushed onto the stack. so we + // have to find it relative to the unextended sp + + assert(_cb->frame_size() >= 0, "must have non-zero frame size"); + intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size(); + intptr_t* unextended_sp = l_sender_sp; + + // the return_address is always the word on the stack + address sender_pc = (address) *(l_sender_sp - 1 + get_return_addr_offset()); + + intptr_t** saved_fp_addr = (intptr_t**)(l_sender_sp - 1 + get_link_offset()); + + // assert (sender_sp() == l_sender_sp, "should be"); + // assert (*saved_fp_addr == link(), "should be"); + + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + // For C1, the runtime stub might not have oop maps, so set this flag + // outside of update_register_map. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + + // Since the prolog does the save and restore of FP there is no + // oopmap for it so we must fill in its location as if there was + // an oopmap entry since if our caller was compiled code there + // could be live jvm state in it. + update_map_with_saved_link(map, saved_fp_addr); + } + + return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc); +} + +//------------------------------------------------------------------------------ +// frame::sender +frame frame::sender(RegisterMap* map) const { + // Default is we done have to follow them. The sender_for_xxx will + // update it accordingly + map->set_include_argument_oops(false); + + if (is_entry_frame()) { + return sender_for_entry_frame(map); + } + if (is_interpreted_frame()) { + return sender_for_interpreter_frame(map); + } + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + // This test looks odd: why is it not is_compiled_frame() ? That's + // because stubs also have OOP maps. + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + intptr_t *link = *(intptr_t**) addr_at(get_link_offset(VMFrameAPCS)); + return frame(sender_sp(), link, sender_pc()); +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + assert(is_interpreted_frame(), "Not an interpreted frame"); + // These are reasonable sanity checks + if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { + return false; + } + if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { + return false; + } + if (fp() + get_interpreter_frame_initial_sp_offset() < sp()) { + return false; + } + // These are hacks to keep us out of trouble. + // The problem with these is that they mask other problems + if (fp() <= sp()) { // this attempts to deal with unsigned comparison above + return false; + } + + // do some validation of frame elements + + // first the method + + Method* m = *interpreter_frame_method_addr(); + + // validate the method we'd find in this potential sender + if (!m->is_valid_method()) return false; + + // stack frames shouldn't be much larger than max_stack elements + // this test requires the use of unextended_sp which is the sp as seen by + // the current frame, and not sp which is the "raw" pc which could point + // further because of local variables of the callee method inserted after + // method arguments + if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) { + return false; + } + + // validate bci/bcp + address bcp = interpreter_frame_bcp(); + if (m->validate_bci_from_bcp(bcp) < 0) { + return false; + } + + // validate constantPoolCache* + ConstantPoolCache* cp = *interpreter_frame_cache_addr(); + if (cp == NULL || !cp->is_metaspace_object()) return false; + + // validate locals + + address locals = (address) *interpreter_frame_locals_addr(); + + if (locals > thread->stack_base() || locals < (address) fp()) return false; + + // We'd have to be pretty unlucky to be mislead at this point + + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { +#ifdef CC_INTERP + // Needed for JVMTI. The result should always be in the + // interpreterState object + interpreterState istate = get_interpreterState(); +#endif // CC_INTERP + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + + intptr_t* tos_addr; + if (method->is_native()) { + tos_addr = (intptr_t*)sp(); + if (type == T_FLOAT || type == T_DOUBLE) { + // This is times two because we do a push(ltos) after pushing D0 + // and that takes two interpreter stack slots. +#ifdef HARD_FLOAT_CC + tos_addr += 2 * Interpreter::stackElementWords; +#endif + } + } else { + tos_addr = (intptr_t*)interpreter_frame_tos_address(); + } + + switch (type) { + case T_OBJECT : + case T_ARRAY : { + oop obj; + if (method->is_native()) { + obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); + } else { + oop* obj_p = (oop*)tos_addr; + obj = (obj_p == NULL) ? (oop)NULL : *obj_p; + } + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; + case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; + case T_CHAR : value_result->c = *(jchar*)tos_addr; break; + case T_SHORT : value_result->s = *(jshort*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : { + value_result->f = *(jfloat*)tos_addr; + break; + } + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + + return type; +} + + +intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); + return &interpreter_frame_tos_address()[index]; +} + +#ifndef PRODUCT + +#define DESCRIBE_FP_OFFSET(name) \ + values.describe(frame_no, fp() + frame::get_##name##_offset(), #name) + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); + DESCRIBE_FP_OFFSET(interpreter_frame_method); + DESCRIBE_FP_OFFSET(interpreter_frame_mdp); + DESCRIBE_FP_OFFSET(interpreter_frame_mirror); + DESCRIBE_FP_OFFSET(interpreter_frame_cache); + DESCRIBE_FP_OFFSET(interpreter_frame_locals); + DESCRIBE_FP_OFFSET(interpreter_frame_bcp); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); + } +} + +#endif // PRODUCT + +intptr_t *frame::initial_deoptimization_info() { + return real_fp(); +} + +intptr_t* frame::real_fp() const { + // Currently we have a fp for all frames + if (_cb != NULL) { + // use the frame size if valid + int size = _cb->frame_size(); + if (size > 0) { + return unextended_sp() + size; + } + } + // else rely on fp() + assert(! is_compiled_frame(), "unknown compiled frame size"); + return fp(); +} + +#undef DESCRIBE_FP_OFFSET + +#define NO_PARAM +#define DESCRIBE_FP_OFFSET(name, param) \ + { \ + unsigned long *p = (unsigned long *)fp; \ + printf("0x%016lx 0x%016lx %s\n", (unsigned long)(p + frame::get_##name##_offset(param)), \ + p[frame::get_##name##_offset(param)], #name); \ + } + +static __thread unsigned long nextfp; +static __thread unsigned long nextpc; +static __thread unsigned long nextsp; +static __thread RegisterMap *reg_map; + +static void printbc(Method *m, intptr_t bcp) { + const char *name; + char buf[16]; + if (m->validate_bci_from_bcp((address)bcp) < 0 || !m->contains((address) bcp)) { + name = "???"; + snprintf(buf, sizeof buf, "(bad)"); + } else { + int bci = m->bci_from((address)bcp); + snprintf(buf, sizeof buf, "%d", bci); + name = Bytecodes::name(m->code_at(bci)); + } + ResourceMark rm; + printf("%s : %s ==> %s\n", m->name_and_sig_as_C_string(), buf, name); +} + +void internal_pf(unsigned long sp, unsigned long fp, unsigned long pc, unsigned long bcx) { + if (! fp) + return; + + DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp, NO_PARAM); + DESCRIBE_FP_OFFSET(interpreter_frame_last_sp, NO_PARAM); + DESCRIBE_FP_OFFSET(interpreter_frame_method, NO_PARAM); + DESCRIBE_FP_OFFSET(interpreter_frame_mdp, NO_PARAM); + DESCRIBE_FP_OFFSET(interpreter_frame_cache, NO_PARAM); + DESCRIBE_FP_OFFSET(interpreter_frame_locals, NO_PARAM); + DESCRIBE_FP_OFFSET(interpreter_frame_bcp, NO_PARAM); + DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp, NO_PARAM); + + unsigned long *p = (unsigned long *)fp; + + // We want to see all frames, native and Java. For compiled and + // interpreted frames we have special information that allows us to + // unwind them; for everything else we assume that the native frame + // pointer chain is intact. + frame this_frame((intptr_t*)sp, (intptr_t*)fp, (address)pc); + if (this_frame.is_compiled_frame() || + this_frame.is_interpreted_frame()) { + DESCRIBE_FP_OFFSET(return_addr, FrameAPCS); + DESCRIBE_FP_OFFSET(link, FrameAPCS); + frame sender = this_frame.sender(reg_map); + nextfp = (unsigned long)sender.fp(); + nextpc = (unsigned long)sender.pc(); + nextsp = (unsigned long)sender.unextended_sp(); + } else { + DESCRIBE_FP_OFFSET(return_addr, VMFrameAPCS); + DESCRIBE_FP_OFFSET(link, VMFrameAPCS); + nextfp = p[frame::get_link_offset(VMFrameAPCS)]; + nextpc = p[frame::get_return_addr_offset(VMFrameAPCS)]; + nextsp = (unsigned long)&p[frame::sender_sp_offset]; + } + + if (bcx == -1ul) { + bcx = p[frame::get_interpreter_frame_bcp_offset()]; + } + + if (Interpreter::contains((address)pc)) { + Method* m = (Method*)p[frame::get_interpreter_frame_method_offset()]; + if(m && m->is_method()) { + printbc(m, bcx); + } else + printf("not a Method\n"); + } else { + CodeBlob *cb = CodeCache::find_blob((address)pc); + if (cb != NULL) { + if (cb->is_nmethod()) { + ResourceMark rm; + nmethod* nm = (nmethod*)cb; + printf("nmethod %s\n", nm->method()->name_and_sig_as_C_string()); + } else if (cb->name()) { + printf("CodeBlob %s\n", cb->name()); + } + } + } +} + +extern "C" void npf() { + CodeBlob *cb = CodeCache::find_blob((address)nextpc); + // C2 does not always chain the frame pointers when it can, instead + // preferring to use fixed offsets from SP, so a simple leave() does + // not work. Instead, it adds the frame size to SP then pops FP and + // LR. We have to do the same thing to get a good call chain. + if (cb && cb->frame_size()) + nextfp = nextsp + wordSize * (cb->frame_size() - 2); + internal_pf (nextsp, nextfp, nextpc, -1); +} + +extern "C" void pf(unsigned long sp, unsigned long fp, unsigned long pc, + unsigned long bcx, unsigned long thread) { + RegisterMap map((JavaThread*)thread, false); + if (!reg_map) { + reg_map = (RegisterMap*)os::malloc(sizeof map, mtNone); + } + memcpy(reg_map, &map, sizeof map); + { + CodeBlob *cb = CodeCache::find_blob((address)pc); + if (cb && cb->frame_size()) + fp = sp + wordSize * (cb->frame_size() - 2); + } + internal_pf(sp, fp, pc, bcx); +} + +// support for printing out where we are in a Java method +// needs to be passed current fp and bcp register values +// prints method name, bc index and bytecode name +extern "C" void pm(unsigned long fp, unsigned long bcx) { + DESCRIBE_FP_OFFSET(interpreter_frame_method, NO_PARAM); + unsigned long *p = (unsigned long *)fp; + Method* m = (Method*)p[frame::get_interpreter_frame_method_offset()]; + printbc(m, bcx); +} + +#ifndef PRODUCT +// This is a generic constructor which is only used by pns() in debug.cpp. +frame::frame(void* sp, void* fp, void* pc) { + init((intptr_t*)sp, (intptr_t*)fp, (address)pc); +} + +void frame::pd_ps() {} +#endif + +void JavaFrameAnchor::make_walkable(JavaThread* thread) { + // last frame set? + if (last_Java_sp() == NULL) return; + // already walkable? + if (walkable()) return; + vmassert(Thread::current() == (Thread*)thread, "not current thread"); + vmassert(last_Java_sp() != NULL, "not called from Java code?"); + vmassert(last_Java_pc() == NULL, "already walkable"); + capture_last_Java_pc(); + vmassert(walkable(), "something went wrong"); +} + +void JavaFrameAnchor::capture_last_Java_pc() { + vmassert(_last_Java_sp != NULL, "no last frame set"); + vmassert(_last_Java_pc == NULL, "already walkable"); + _last_Java_pc = (address)_last_Java_sp[-1]; +} --- /dev/null 2018-09-25 19:24:46.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/frame_aarch32.hpp 2018-09-25 19:24:46.000000000 +0300 @@ -0,0 +1,189 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_FRAME_AARCH32_HPP +#define CPU_AARCH32_VM_FRAME_AARCH32_HPP + +#include "runtime/synchronizer.hpp" + +// A frame represents a physical stack frame (an activation). Frames can be +// C or Java frames, and the Java frames can be interpreted or compiled. +// In contrast, vframes represent source-level activations, so that one physical frame +// can correspond to multiple source level frames because of inlining. +// A frame is comprised of {pc, fp, sp} +// ------------------------------ Asm interpreter ---------------------------------------- +// Layout of asm interpreter frame: +// [expression stack ] * <- sp + +// [monitors[0] ] \ +// ... | monitor block size = k +// [monitors[k-1] ] / +// [frame initial esp ] ( == &monitors[0], initially here) initial_sp_offset +// [byte code index/pointr] = bcx() bcx_offset + +// [pointer to locals ] = locals() locals_offset +// [constant pool cache ] = cache() cache_offset + +// [klass of method ] = mirror() mirror_offset +// [padding ] + +// [methodData ] = mdp() mdx_offset +// [methodOop ] = method() method_offset + +// [last esp ] = last_sp() last_sp_offset +// [old stack pointer ] (sender_sp) sender_sp_offset + +// [old frame pointer ] = link() +// [return pc ] <- fp + +// [last sp ] +// [oop temp ] (only for native calls) + +// [locals and parameters ] +// <- sender sp +// ------------------------------ Asm interpreter ---------------------------------------- + + public: + enum { + pc_return_offset = 0, + // All frames + sender_sp_offset = 1, + + // we don't need a save area + arg_reg_save_area_bytes = 0, + + // Interpreter frames + interpreter_frame_oop_temp_offset = 2, // for native calls only + + interpreter_frame_sender_sp_offset = 0, + // outgoing sp before a call to an invoked method + interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, + interpreter_frame_method_offset = interpreter_frame_last_sp_offset - 1, + interpreter_frame_mdp_offset = interpreter_frame_method_offset - 1, + interpreter_frame_padding_offset = interpreter_frame_mdp_offset - 1, + interpreter_frame_mirror_offset = interpreter_frame_padding_offset - 1, + interpreter_frame_cache_offset = interpreter_frame_mirror_offset - 1, + interpreter_frame_locals_offset = interpreter_frame_cache_offset - 1, + interpreter_frame_bcp_offset = interpreter_frame_locals_offset - 1, + interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, + + interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, + interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, + }; + + static int get_frame_size(bool apcs = FrameAPCS) { + return apcs ? 4 : 2; + } + + static int get_link_offset(bool apcs = FrameAPCS) { + return apcs ? -3 : -1; + } + + static int get_return_addr_offset(bool apcs = FrameAPCS) { + return apcs ? -1 : 0; + } + + // Entry frames + // n.b. these values are determined by the layout defined in + // stubGenerator for the Java call stub + static int get_entry_frame_after_call_words() { return (get_frame_size(VMFrameAPCS) + 10) + (StackAlignmentInBytes/BytesPerWord); } + static int get_entry_frame_call_wrapper_offset() { return -(get_frame_size(VMFrameAPCS) + 10); } + + static int get_offset_from_rfp_bytes() { return wordSize * (get_frame_size(VMFrameAPCS) - 1); } + static int get_interpreter_frame_oop_temp_offset() { return interpreter_frame_oop_temp_offset; } + static int get_interpreter_frame_sender_sp_offset() { return -get_frame_size() + interpreter_frame_sender_sp_offset; } + static int get_interpreter_frame_last_sp_offset() { return -get_frame_size() + interpreter_frame_last_sp_offset; } + static int get_interpreter_frame_method_offset() { return -get_frame_size() + interpreter_frame_method_offset; } + static int get_interpreter_frame_mdp_offset() { return -get_frame_size() + interpreter_frame_mdp_offset; } + static int get_interpreter_frame_padding_offset() { return -get_frame_size() + interpreter_frame_padding_offset; } + static int get_interpreter_frame_mirror_offset() { return -get_frame_size() + interpreter_frame_mirror_offset; } + static int get_interpreter_frame_cache_offset() { return -get_frame_size() + interpreter_frame_cache_offset; } + static int get_interpreter_frame_locals_offset() { return -get_frame_size() + interpreter_frame_locals_offset; } + static int get_interpreter_frame_bcp_offset() { return -get_frame_size() + interpreter_frame_bcp_offset; } + static int get_interpreter_frame_initial_sp_offset() { return -get_frame_size() + interpreter_frame_initial_sp_offset; } + static int get_interpreter_frame_monitor_block_top_offset() { return -get_frame_size() + interpreter_frame_monitor_block_top_offset; } + static int get_interpreter_frame_monitor_block_bottom_offset() { return -get_frame_size() + interpreter_frame_monitor_block_bottom_offset; } + + intptr_t ptr_at(int offset) const { + return *ptr_at_addr(offset); + } + + void ptr_at_put(int offset, intptr_t value) { + *ptr_at_addr(offset) = value; + } + + private: + // an additional field beyond _sp and _pc: + intptr_t* _fp; // frame pointer + // The interpreter and adapters will extend the frame of the caller. + // Since oopMaps are based on the sp of the caller before extension + // we need to know that value. However in order to compute the address + // of the return address we need the real "raw" sp. Since sparc already + // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's + // original sp we use that convention. + + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + intptr_t* ptr_at_addr(int offset) const { + return (intptr_t*) addr_at(offset); + } + +#ifdef ASSERT + // Used in frame::sender_for_{interpreter,compiled}_frame + static void verify_deopt_original_pc( CompiledMethod* nm, intptr_t* unextended_sp); +#endif + + public: + // Constructors + + frame(intptr_t* sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); + + frame(intptr_t* sp, intptr_t* fp); + + void init(intptr_t* sp, intptr_t* fp, address pc); + + // accessors for the instance variables + // Note: not necessarily the real 'frame pointer' (see real_fp) + intptr_t* fp() const { return _fp; } + + inline address* sender_pc_addr() const; + + // expression stack tos if we are nested in a java call + intptr_t* interpreter_frame_last_sp() const; + + // helper to update a map with callee-saved RBP + static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); + + // deoptimization support + void interpreter_frame_set_last_sp(intptr_t* sp); + + static jint interpreter_frame_expression_stack_direction() { return -1; } + +#endif // CPU_AARCH32_VM_FRAME_AARCH32_HPP --- /dev/null 2018-09-25 19:24:47.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/frame_aarch32.inline.hpp 2018-09-25 19:24:47.000000000 +0300 @@ -0,0 +1,249 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_FRAME_AARCH32_INLINE_HPP +#define CPU_AARCH32_VM_FRAME_AARCH32_INLINE_HPP + +#include "code/codeCache.hpp" +#include "code/vmreg.inline.hpp" + +// Inline functions for AArch64 frames: + +// Constructors: + +inline frame::frame() { + _pc = NULL; + _sp = NULL; + _unextended_sp = NULL; + _fp = NULL; + _cb = NULL; + _deopt_state = unknown; +} + +static int spin; + +inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { + intptr_t a = intptr_t(sp); + intptr_t b = intptr_t(fp); + _sp = sp; + _unextended_sp = sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { + init(sp, fp, pc); +} + +inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { + intptr_t a = intptr_t(sp); + intptr_t b = intptr_t(fp); + _sp = sp; + _unextended_sp = unextended_sp; + _fp = fp; + _pc = pc; + assert(pc != NULL, "no pc?"); + _cb = CodeCache::find_blob(pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + assert(_cb->as_compiled_method()->insts_contains_inclusive(_pc), + "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +inline frame::frame(intptr_t* sp, intptr_t* fp) { + _sp = sp; + _unextended_sp = sp; + _fp = fp; + assert(sp != NULL, "null SP ?"); + // C2 generated code does not use or set fp + _pc = (address)(/*fp != NULL ? fp[0] : */sp[-1]); + + // Here's a sticky one. This constructor can be called via AsyncGetCallTrace + // when last_Java_sp is non-null but the pc fetched is junk. If we are truly + // unlucky the junk value could be to a zombied method and we'll die on the + // find_blob call. This is also why we can have no asserts on the validity + // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler + // -> pd_last_frame should use a specialized version of pd_last_frame which could + // call a specilaized frame constructor instead of this one. + // Then we could use the assert below. However this assert is of somewhat dubious + // value. + // assert(_pc != NULL, "no pc?"); + + _cb = CodeCache::find_blob(_pc); + adjust_unextended_sp(); + + address original_pc = CompiledMethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } +} + +// Accessors + +inline bool frame::equal(frame other) const { + bool ret = sp() == other.sp() + && unextended_sp() == other.unextended_sp() + && fp() == other.fp() + && pc() == other.pc(); + assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); + return ret; +} + +// Return unique id for this frame. The id must have a value where we can distinguish +// identity and younger/older relationship. NULL represents an invalid (incomparable) +// frame. +inline intptr_t* frame::id(void) const { return unextended_sp(); } + +// Relationals on frames based +// Return true if the frame is younger (more recent activation) than the frame represented by id +inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() < id ; } + +// Return true if the frame is older (less recent activation) than the frame represented by id +inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); + return this->id() > id ; } + +inline intptr_t* frame::link() const { return *(intptr_t **)addr_at(get_link_offset(VMFrameAPCS)); } + +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +inline address frame::sender_pc() const { return *(address*) addr_at(get_return_addr_offset(VMFrameAPCS)); } + +inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); } + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + return (intptr_t**)addr_at(get_interpreter_frame_locals_offset()); +} + +inline intptr_t* frame::interpreter_frame_last_sp() const { + return *(intptr_t**)addr_at(get_interpreter_frame_last_sp_offset()); +} + +inline intptr_t* frame::interpreter_frame_bcp_addr() const { + return (intptr_t*) addr_at(get_interpreter_frame_bcp_offset()); +} + +inline intptr_t* frame::interpreter_frame_mdp_addr() const { + return (intptr_t*) addr_at(get_interpreter_frame_mdp_offset()); +} + + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + return (ConstantPoolCache**)addr_at(get_interpreter_frame_cache_offset()); +} + +// Method + +inline Method** frame::interpreter_frame_method_addr() const { + return (Method**)addr_at(get_interpreter_frame_method_offset()); +} + +// Mirror + +inline oop* frame::interpreter_frame_mirror_addr() const { + return (oop*)addr_at(get_interpreter_frame_mirror_offset()); +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + intptr_t* last_sp = interpreter_frame_last_sp(); + if (last_sp == NULL) { + return sp(); + } else { + // sp() may have been extended or shrunk by an adapter. At least + // check that we don't fall behind the legal region. + // For top deoptimized frame last_sp == interpreter_frame_monitor_end. + assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos"); + return last_sp; + } +} + +inline oop* frame::interpreter_frame_temp_oop_addr() const { + return (oop *)(fp() + interpreter_frame_oop_temp_offset); +} + +inline int frame::interpreter_frame_monitor_size() { + return BasicObjectLock::size(); +} + + +// expression stack +// (the max_stack arguments are used by the GC; see class FrameClosure) + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); + return monitor_end-1; +} + + +// Entry frames + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)addr_at(get_entry_frame_call_wrapper_offset()); +} + + +// Compiled frames + +inline oop frame::saved_oop_result(RegisterMap* map) const { + oop* result_adr = (oop *)map->location(r0->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + + return (*result_adr); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + oop* result_adr = (oop *)map->location(r0->as_VMReg()); + guarantee(result_adr != NULL, "bad register save location"); + + *result_adr = obj; +} + +#endif // CPU_AARCH32_VM_FRAME_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:24:48.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/gc/g1/g1BarrierSetAssembler_aarch32.cpp 2018-09-25 19:24:48.000000000 +0300 @@ -0,0 +1,471 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/g1/c1/g1BarrierSetC1.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" +#include "gc/g1/heapRegion.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/sharedRuntime.hpp" + +#define __ masm-> + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count) { + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + // With G1, don't generate the call if we statically know that the target in uninitialized + if (!dest_uninitialized) { + __ push(RegSet::range(r0, r3), sp); + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ eor(c_rarg0, c_rarg0, c_rarg1); + __ eor(c_rarg1, c_rarg0, c_rarg1); + __ eor(c_rarg0, c_rarg0, c_rarg1); + } else { + __ mov(c_rarg1, count); + __ mov(c_rarg0, addr); + } + } else { + __ mov(c_rarg0, addr); + __ mov(c_rarg1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); + __ pop(RegSet::range(r0, r3), sp); + } +} + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register end, Register scratch) { + // must compute element count unless barrier set interface is changed (other platforms supply count) + assert_different_registers(start, end, scratch); + __ lea(scratch, Address(end, BytesPerHeapOop)); + __ sub(scratch, scratch, start); // subtract start to get #bytes + __ lsr(scratch, scratch, LogBytesPerHeapOop); // convert to element count + __ mov(c_rarg0, start); + __ mov(c_rarg1, scratch); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); +} + + +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Address obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + + assert(thread == rthread, "must be"); + + Label done; + Label runtime; + + assert(pre_val != noreg, "check this code"); + + assert_different_registers(pre_val, tmp); + if (obj.get_mode() != Address::no_mode) + assert(!obj.uses(pre_val) && !obj.uses(tmp), "destroys register"); + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ldr(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldrb(tmp, in_progress); + } + __ cbz(tmp, done); + + // Do we need to load the previous value? + if (obj.get_mode() != Address::no_mode) { + __ load_heap_oop(pre_val, obj, noreg, noreg, AS_RAW); + } + + // Is the previous value null? + __ cbz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + __ ldr(tmp, index); // tmp := *index_adr + __ cbz(tmp, runtime); // tmp == 0? + // If yes, goto runtime + + __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize + __ str(tmp, index); // *index_adr := tmp + __ ldr(rscratch1, buffer); + __ add(tmp, tmp, rscratch1); // tmp := tmp + *buffer_adr + + // Record the previous value + __ str(pre_val, Address(tmp)); + __ b(done); + + __ bind(runtime); + // save the live input values + __ push(r0->bit(tosca_live) | obj.reg_bits() | pre_val->bit(true) | lr->bit(true), sp); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } + + __ pop(r0->bit(tosca_live) | obj.reg_bits() | pre_val->bit(true) | lr->bit(true), sp); + + __ bind(done); +} + +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Address store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + assert(thread == rthread, "must be"); + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // Does store cross heap regions? + + __ lea(tmp2, store_addr); + __ eor(tmp, tmp2, new_val); + __ lsrs(tmp, tmp, HeapRegion::LogOfHRGrainBytes); + __ b(done, Assembler::EQ); + + // crosses regions, storing NULL? + + __ cbz(new_val, done); + + // storing region crossing non-NULL, is card already dirty? + + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + const Register card_addr = tmp; + + __ lsr(card_addr, tmp2, CardTable::card_shift); + + //ExternalAddress cardtable((address) ct->byte_map_base()); + __ mov(tmp2, (unsigned)ct->byte_map_base()); + + // get the address of the card + __ add(card_addr, card_addr, tmp2); + __ ldrb(tmp2, Address(card_addr)); + __ cmp(tmp2, (int)G1CardTable::g1_young_card_val()); + __ b(done, Assembler::EQ); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(Assembler::StoreLoad); + + __ ldrb(tmp2, Address(card_addr)); + __ cbz(tmp2, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + __ mov(rscratch1, 0); + __ strb(rscratch1, Address(card_addr)); + + __ ldr(rscratch1, queue_index); + __ cbz(rscratch1, runtime); + __ sub(rscratch1, rscratch1, wordSize); + __ str(rscratch1, queue_index); + + __ ldr(tmp2, buffer); + __ str(card_addr, Address(tmp2, rscratch1)); + __ b(done); + + __ bind(runtime); + // save the live input values + __ push(store_addr.reg_bits() | new_val->bit(true), sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop(store_addr.reg_bits() | new_val->bit(true), sp); + + __ bind(done); +} + +void G1BarrierSetAssembler::load_word_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + bool on_oop = type == T_OBJECT || type == T_ARRAY; + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBarrierSetAssembler::load_word_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + if (on_oop && on_reference) { + // LR is live. It must be saved around calls. + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + Address() /* obj */, + dst /* pre_val */, + rthread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + } +} + +void G1BarrierSetAssembler::load_tos_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address src, Register tmp1, Register tmp_thread) { + bool on_oop = type == T_OBJECT || type == T_ARRAY; + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBarrierSetAssembler::load_tos_at(masm, decorators, type, src, tmp1, tmp_thread); + if (on_oop && on_reference) { + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + Address() /* obj */, + r0 /* pre_val */, // atos is in r0 + rthread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + } +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + g1_write_barrier_pre(masm, + dst /* obj */, + tmp2 /* pre_val */, + rthread /* thread */, + tmp1 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + + if (val == noreg) { + BarrierSetAssembler::store_word_at(masm, decorators, type, dst, noreg, tmp1, noreg); + } else { + BarrierSetAssembler::store_word_at(masm, decorators, type, dst, val, noreg, noreg); + g1_write_barrier_post(masm, + dst /* store_adr */, + val /* new_val */, + rthread /* thread */, + tmp1 /* tmp */, + tmp2 /* tmp2 */); + } + +} + +#ifdef COMPILER1 + +#undef __ +#define __ ce->masm()-> + +void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + + __ bind(*stub->entry()); + + assert(stub->pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + } + __ cbz(pre_val_reg, *stub->continuation()); + ce->store_parameter(stub->pre_val()->as_register(), 0); + __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); + __ b(*stub->continuation()); +} + +void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + assert(stub->addr()->is_register(), "Precondition."); + assert(stub->new_val()->is_register(), "Precondition."); + Register new_val_reg = stub->new_val()->as_register(); + __ cbz(new_val_reg, *stub->continuation()); + ce->store_parameter(stub->addr()->as_pointer_register(), 0); + __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); + __ b(*stub->continuation()); +} + +#undef __ + +#define __ sasm-> + +void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_pre_barrier", false); + + // arg0 : previous value of memory + + BarrierSet* bs = BarrierSet::barrier_set(); + + const Register pre_val = r0; + const Register thread = rthread; + const Register tmp = rscratch1; + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + Label done; + Label runtime; + + // Is marking still active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ldr(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldrb(tmp, in_progress); + } + __ cbz(tmp, done); + + // Can we store original value in the thread's buffer? + __ ldr(tmp, queue_index); + __ cbz(tmp, runtime); + + __ sub(tmp, tmp, wordSize); + __ str(tmp, queue_index); + __ ldr(rscratch2, buffer); + __ add(tmp, tmp, rscratch2); + __ load_parameter(0, rscratch2); + __ str(rscratch2, Address(tmp, 0)); + __ b(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ load_parameter(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + + __ epilogue(); +} + +void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_post_barrier", false); + + // arg0: store_address + Address store_addr(rfp, 2*BytesPerWord); + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + + const Register thread = rthread; + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + const Register card_addr = rscratch2; + ExternalAddress cardtable((address) ct->byte_map_base()); + + __ load_parameter(0, card_addr); + __ lsr(card_addr, card_addr, CardTable::card_shift); + __ mov(rscratch1, cardtable); + __ add(card_addr, card_addr, rscratch1); + __ ldrb(rscratch1, Address(card_addr)); + __ cmp(rscratch1, (int)G1CardTable::g1_young_card_val()); + __ b(done, Assembler::EQ); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(Assembler::StoreLoad); + __ ldrb(rscratch1, Address(card_addr)); + __ cbz(rscratch1, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + __ mov(rscratch1, 0); + __ strb(rscratch1, Address(card_addr)); + + __ ldr(rscratch1, queue_index); + __ cbz(rscratch1, runtime); + __ sub(rscratch1, rscratch1, wordSize); + __ str(rscratch1, queue_index); + + // Reuse LR to hold buffer_addr + const Register buffer_addr = lr; + + __ ldr(buffer_addr, buffer); + __ str(card_addr, Address(buffer_addr, rscratch1)); + __ b(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + __ epilogue(); +} + +#undef __ + +#endif // COMPILER1 --- /dev/null 2018-09-25 19:24:49.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/gc/g1/g1BarrierSetAssembler_aarch32.hpp 2018-09-25 19:24:49.000000000 +0300 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_GC_G1_G1BARRIERSETASSEMBLER_AARCH32_HPP +#define CPU_AARCH32_GC_G1_G1BARRIERSETASSEMBLER_AARCH32_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" +#include "utilities/macros.hpp" + +class LIR_Assembler; +class StubAssembler; +class G1PreBarrierStub; +class G1PostBarrierStub; + +class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count); + void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register end, Register tmp); + + void g1_write_barrier_pre(MacroAssembler* masm, + Address obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(MacroAssembler* masm, + Address store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + +public: +#ifdef COMPILER1 + void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); + void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); + + void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); + void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); +#endif + + void load_word_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + void load_tos_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address src, Register tmp1, Register tmp_thread); +}; + +#endif // CPU_AARCH32_GC_G1_G1BARRIERSETASSEMBLER_AARCH32_HPP --- /dev/null 2018-09-25 19:24:50.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/gc/shared/barrierSetAssembler_aarch32.cpp 2018-09-25 19:24:50.000000000 +0300 @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "runtime/jniHandles.hpp" +#include "runtime/thread.hpp" + +#define __ masm-> + +void BarrierSetAssembler::load_word_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + + // LR is live. It must be saved around calls. + + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + switch (type) { + case T_OBJECT: + case T_ARRAY: { + assert(in_heap || in_native, "why else?"); + __ ldr(dst, src); + break; + } + case T_INT: __ ldr(dst, src); break; + case T_ADDRESS: __ ldr(dst, src); break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::store_word_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + switch (type) { + case T_OBJECT: + case T_ARRAY: { + if (val == noreg) { + assert(tmp1 != noreg, "must provide valid register"); + __ mov(tmp1, 0); + val = tmp1; + } + assert(in_heap || in_native, "why else?"); + __ str(val, dst); + break; + } + case T_INT: __ str(val, dst); break; + case T_ADDRESS: __ str(val, dst); break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::load_tos_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address src, Register tmp1, Register tmp_thread) { + + // LR is live. It must be saved around calls. + + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool is_not_null = (decorators & IS_NOT_NULL) != 0; + bool atomic = (decorators & MO_SEQ_CST) != 0; + switch (type) { + case T_OBJECT: + case T_ARRAY: { + assert(in_heap || in_native, "why else?"); + __ ldr(r0, src); + break; + } + case T_BOOLEAN: __ load_unsigned_byte (r0, src); break; + case T_BYTE: __ load_signed_byte (r0, src); break; + case T_CHAR: __ load_unsigned_short(r0, src); break; + case T_SHORT: __ load_signed_short (r0, src); break; + case T_DOUBLE: + if (hasFPU()) { + if (!src.is_safe_for(atomic ? Address::IDT_ATOMIC : Address::IDT_DOUBLE)) { + assert(tmp1 != noreg, "must be"); + __ lea(tmp1, src); + src = Address(tmp1); + } + if (atomic) { + __ atomic_ldrd(r0, r1, src.base()); + __ vmov_f64(d0, r0, r1); + } else { + __ vldr_f64(d0, src); + } + break; + } + // else fall-through + case T_LONG: + if (atomic) { + if (!src.is_safe_for(Address::IDT_ATOMIC)) { + assert(tmp1 != noreg, "must be"); + __ lea(tmp1, src); + src = Address(tmp1); + } + __ atomic_ldrd(r0, r1, src.base()); + } else { + __ ldrd(r0, r1, src); + } + break; + case T_FLOAT: + if (hasFPU()) { + if (!src.is_safe_for(Address::IDT_FLOAT)) { + assert(tmp1 != noreg, "must be"); + __ lea(tmp1, src); + src = Address(tmp1); + } + __ vldr_f32(f0, src); + break; + } + // else fall-through + case T_ADDRESS: + // fall-through + case T_INT: __ ldr (r0, src); break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::store_tos_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_native = (decorators & IN_NATIVE) != 0; + bool atomic = (decorators & MO_SEQ_CST) != 0; + switch (type) { + case T_OBJECT: + case T_ARRAY: { + assert(in_heap || in_native, "why else?"); + __ str(r0, dst); + break; + } + case T_BOOLEAN: + __ andr(r0, r0, 0x1); // boolean is true if LSB is 1 + __ strb(r0, dst); + break; + case T_BYTE: __ strb (r0, dst); break; + case T_CHAR: __ strh (r0, dst); break; + case T_SHORT: __ strh (r0, dst); break; + case T_FLOAT: + if (hasFPU()) { + if (!dst.is_safe_for(Address::IDT_FLOAT)) { + assert(tmp1 != noreg, "must be"); + __ lea(tmp1, dst); + dst = Address(tmp1); + } + __ vstr_f32(d0, dst); + break; + } + // else fall-through + case T_INT: __ str (r0, dst); break; + case T_DOUBLE: + if (hasFPU()) { + if (atomic) { + __ vmov_f64(r0, r1, d0); + // fall-through to T_LONG + } else { + if (!dst.is_safe_for(Address::IDT_DOUBLE)) { + assert(tmp1 != noreg, "must be"); + __ lea(tmp1, dst); + dst = Address(tmp1); + } + __ vstr_f64(d0, dst); + break; + } + } + // else fall-through + case T_LONG: + if (atomic) { + assert(tmp1 != noreg && tmp2 != noreg, "must be"); + assert_different_registers(rscratch1, tmp1, tmp2); + Register base; + if (!dst.is_safe_for(Address::IDT_ATOMIC) || + dst.uses(tmp1) || dst.uses(tmp2)) { + __ lea(rscratch1, dst); + base = rscratch1; + } else { + base = dst.base(); // strexd only supports [base] addressing + } + __ atomic_strd(r0, r1, base, tmp1, tmp2); + } else { + __ strd(r0, r1, dst); + } + break; + case T_ADDRESS: __ str (r0, dst); break; + default: Unimplemented(); + } +} + +void BarrierSetAssembler::obj_equals(MacroAssembler* masm, + Register obj1, Register obj2) { + __ cmp(obj1, obj2); +} + +void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath) { + // If mask changes we need to ensure that the inverse is still encodable as an immediate + STATIC_ASSERT(JNIHandles::weak_tag_mask == 1); + __ bic(obj, obj, JNIHandles::weak_tag_mask); + + __ ldr(obj, Address(obj, 0)); // *obj +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Register t2, + Label& slow_case) { + assert_different_registers(obj, t2); + assert_different_registers(obj, var_size_in_bytes); + Register end = t2; + + // verify_tlab(); + + __ ldr(obj, Address(rthread, JavaThread::tlab_top_offset())); + if (var_size_in_bytes == noreg) { + __ lea(end, Address(obj, con_size_in_bytes)); + } else { + __ lea(end, Address(obj, var_size_in_bytes)); + } + __ ldr(rscratch1, Address(rthread, JavaThread::tlab_end_offset())); + __ cmp(end, rscratch1); + __ b(slow_case, Assembler::HI); + + // update the tlab top pointer + __ str(end, Address(rthread, JavaThread::tlab_top_offset())); + + // recover var_size_in_bytes if necessary + if (var_size_in_bytes == end) { + __ sub(var_size_in_bytes, var_size_in_bytes, obj); + } + // verify_tlab(); +} + +// Defines obj, preserves var_size_in_bytes. uses rscratch1 and rscratch2 +void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) { + assert_different_registers(obj, var_size_in_bytes, t1); + if (!Universe::heap()->supports_inline_contig_alloc()) { + __ b(slow_case); + } else { + Register end = t1; + Register heap_end = rscratch2; + Label retry; + __ bind(retry); + + __ mov(rscratch1, ExternalAddress((address) Universe::heap()->end_addr())); + __ ldr(heap_end, Address(rscratch1)); + + ExternalAddress heap_top((address) Universe::heap()->top_addr()); + __ mov(rscratch1, heap_top); + __ ldrex(obj, rscratch1); + + // Adjust it my the size of our new object + if (var_size_in_bytes == noreg) { + __ lea(end, Address(obj, con_size_in_bytes)); + } else { + __ lea(end, Address(obj, var_size_in_bytes)); + } + + // if end < obj then we wrapped around high memory + __ cmp(end, obj); + __ b(slow_case, Assembler::LO); + + __ cmp(end, heap_end); + __ b(slow_case, Assembler::HI); + + // If heap_top hasn't been changed by some other thread, update it. + __ mov(rscratch2, rscratch1); + __ strex(rscratch1, end, rscratch2); + __ cmp(rscratch1, 0); + __ b(retry, Assembler::NE); + + incr_allocated_bytes(masm, var_size_in_bytes, con_size_in_bytes, t1); + } +} + +void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1) { + assert(t1->is_valid(), "need temp reg"); + + __ ldr(t1, Address(rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + if (var_size_in_bytes->is_valid()) { + __ add(t1, t1, var_size_in_bytes); + } else { + __ add(t1, t1, con_size_in_bytes); + } + __ str(t1, Address(rthread, in_bytes(JavaThread::allocated_bytes_offset()))); +} --- /dev/null 2018-09-25 19:24:51.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/gc/shared/barrierSetAssembler_aarch32.hpp 2018-09-25 19:24:51.000000000 +0300 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_GC_SHARED_BARRIERSETASSEMBLER_AARCH32_HPP +#define CPU_AARCH32_GC_SHARED_BARRIERSETASSEMBLER_AARCH32_HPP + +#include "asm/macroAssembler.hpp" +#include "memory/allocation.hpp" +#include "oops/access.hpp" + +class BarrierSetAssembler: public CHeapObj { +private: + void incr_allocated_bytes(MacroAssembler* masm, + Register var_size_in_bytes, int con_size_in_bytes, + Register t1 = noreg); + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register addr, Register count) {} + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register end, Register tmp) {} + virtual void load_word_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread); + virtual void store_word_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + virtual void load_tos_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address src, Register tmp1, Register tmp_thread); + virtual void store_tos_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register tmp1, Register tmp2); + + virtual void obj_equals(MacroAssembler* masm, + Register obj1, Register obj2); + + virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, + Register obj, Register tmp, Label& slowpath); + + virtual void tlab_allocate(MacroAssembler* masm, + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void eden_allocate(MacroAssembler* masm, + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + virtual void barrier_stubs_init() {} +}; + +#endif // CPU_AARCH32_GC_SHARED_BARRIERSETASSEMBLER_AARCH32_HPP --- /dev/null 2018-09-25 19:24:52.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/gc/shared/cardTableBarrierSetAssembler_aarch32.cpp 2018-09-25 19:24:52.000000000 +0300 @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/cardTableBarrierSetAssembler.hpp" +#include "interpreter/interp_masm.hpp" + +#define __ masm-> + +void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) { + + // Does a store check for the oop in register obj. The content of + // register obj is destroyed afterwards. + + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, + "Wrong barrier set kind"); + + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + assert(CardTable::dirty_card_val() == 0, "must be"); + + jbyte *byte_map_base = + ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base(); + __ mov(rscratch1, (uint32_t)byte_map_base); + assert((p2i(byte_map_base) & 0xff) == 0, "fix store char 0 below"); + + if (UseCondCardMark) { + __ membar(Assembler::StoreLoad); + __ ldrb(rscratch2, Address(rscratch1, obj, lsr((int) CardTable::card_shift))); + __ cmp(rscratch2, 0); + __ strb(rscratch1, Address(rscratch1, obj, lsr((int) CardTable::card_shift)), Assembler::NE); + } else { + if (ct->scanned_concurrently()) { + __ membar(Assembler::StoreStore); + } + __ strb(rscratch1, Address(rscratch1, obj, lsr((int) CardTable::card_shift))); + } +} + +void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register end, Register scratch) { + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label L_loop; + + __ lsr(start, start, CardTable::card_shift); + __ lsr(end, end, CardTable::card_shift); + __ sub(end, end, start); // number of bytes to copy + + const Register count = end; // 'end' register contains bytes count now + __ mov(scratch, (address)ct->byte_map_base()); + __ add(start, start, scratch); + if (ct->scanned_concurrently()) { + __ membar(__ StoreStore); + } + __ bind(L_loop); + __ mov(scratch, 0); + __ strb(scratch, Address(start, count)); + __ subs(count, count, 1); + __ b(L_loop, Assembler::HS); +} + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool is_array = (decorators & IS_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = is_array || on_anonymous; + + bool needs_post_barrier = val != noreg && in_heap; + BarrierSetAssembler::store_word_at(masm, decorators, type, dst, val, tmp1, noreg); + if (needs_post_barrier) { + // flatten object address if needed + if (!precise || (dst.index() == noreg && dst.offset() == 0)) { + store_check(masm, dst.base(), dst); + } else { + __ lea(tmp1, dst); + store_check(masm, tmp1, dst); + } + } +} --- /dev/null 2018-09-25 19:24:54.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/gc/shared/cardTableBarrierSetAssembler_aarch32.hpp 2018-09-25 19:24:53.000000000 +0300 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_AARCH32_HPP +#define CPU_AARCH32_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_AARCH32_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { +protected: + void store_check(MacroAssembler* masm, Register obj, Address dst); + + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register end, Register tmp); + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); + +}; + +#endif // #ifndef CPU_AARCH32_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_AARCH32_HPP --- /dev/null 2018-09-25 19:24:55.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/gc/shared/modRefBarrierSetAssembler_aarch32.cpp 2018-09-25 19:24:54.000000000 +0300 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/modRefBarrierSetAssembler.hpp" + +#define __ masm-> + +void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register addr, Register count) { + + if (is_oop) { + gen_write_ref_array_pre_barrier(masm, decorators, addr, count); + } +} + +void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register end, Register scratch) { + if (is_oop) { + gen_write_ref_array_post_barrier(masm, decorators, start, end, scratch); + } +} + +void ModRefBarrierSetAssembler::store_word_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + if (type == T_OBJECT || type == T_ARRAY) { + oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); + } else { + BarrierSetAssembler::store_word_at(masm, decorators, type, dst, val, tmp1, tmp2); + } +} --- /dev/null 2018-09-25 19:24:56.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/gc/shared/modRefBarrierSetAssembler_aarch32.hpp 2018-09-25 19:24:55.000000000 +0300 @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_GC_SHARED_MODREFBARRIERSETASSEMBLER_AARCH64_HPP +#define CPU_AARCH64_GC_SHARED_MODREFBARRIERSETASSEMBLER_AARCH64_HPP + +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" + +// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other +// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected +// accesses, which are overridden in the concrete BarrierSetAssembler. + +class ModRefBarrierSetAssembler: public BarrierSetAssembler { +protected: + // Generate code for an array write pre barrier + // + // addr - starting address + // count - element count + // + // Destroy no registers! + // + virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count) {} + + // Generate code for an array write post barrier + // + // Input: + // start - register containing starting address of destination array + // end - register containing ending address of destination array + // scratch - scratch register + // + // The input registers are overwritten. + // The ending address is inclusive. + virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register end, Register scratch) {} + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) = 0; + +public: + virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register addr, Register count); + virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, + Register start, Register end, Register scratch); + virtual void store_word_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2); +}; + +#endif // CPU_AARCH64_GC_SHARED_MODREFBARRIERSETASSEMBLER_AARCH64_HPP --- /dev/null 2018-09-25 19:24:57.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/globalDefinitions_aarch32.hpp 2018-09-25 19:24:56.000000000 +0300 @@ -0,0 +1,72 @@ +/* + * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_GLOBALDEFINITIONS_AARCH32_HPP +#define CPU_AARCH32_VM_GLOBALDEFINITIONS_AARCH32_HPP + +// __ARM_PCS_VFP indicates that gcc runs with "-mfloat-abi=hard" option. +// This option allows generation of floating point instructions and enforces +// usage of FPU-specific calling conventions. +#ifdef __ARM_PCS_VFP +#define HARD_FLOAT_CC +#endif // __ARM_PCS_VFP + +// If changing this please be sure to review all code which saves the registers +// and the corresponding register maps to ensure that the respective frame +// sizes are multiple of this new value +const int StackAlignmentInBytes = 8; + +// Indicates whether the C calling conventions require that +// 32-bit integer argument values are properly extended to 64 bits. +const bool CCallingConventionRequiresIntsAsLongs = false; + +#define SUPPORTS_NATIVE_CX8 + +// The maximum B/BL offset range on AArch32 is 32MB. +#undef CODE_CACHE_DEFAULT_LIMIT +#define CODE_CACHE_DEFAULT_LIMIT (32*M) + +// According to the ARMv8 ARM, "Concurrent modification and execution +// of instructions can lead to the resulting instruction performing +// any behavior that can be achieved by executing any sequence of +// instructions that can be executed from the same Exception level, +// except where the instruction before modification and the +// instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, or +// SMC instruction." +// +// This makes the games we play when patching difficult, so when we +// come across an access that needs patching we deoptimize. There are +// ways we can avoid this, but these would slow down C1-compiled code +// in the default case. We could revisit this decision if we get any +// evidence that it's worth doing. +#define DEOPTIMIZE_WHEN_PATCHING + +#define SUPPORT_RESERVED_STACK_AREA + +#define THREAD_LOCAL_POLL + +#endif // CPU_AARCH32_VM_GLOBALDEFINITIONS_AARCH32_HPP --- /dev/null 2018-09-25 19:24:58.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/globals_aarch32.hpp 2018-09-25 19:24:57.000000000 +0300 @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_GLOBALS_AARCH32_HPP +#define CPU_AARCH32_VM_GLOBALS_AARCH32_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, ShareVtableStubs, true); +define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks +define_pd_global(bool, TrapBasedNullChecks, false); +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs past to check cast + +//TODO: update if 32 bit platforms need different sizes +define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 32); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); + +#define DEFAULT_STACK_YELLOW_PAGES (2) +#define DEFAULT_STACK_RED_PAGES (1) +#define DEFAULT_STACK_SHADOW_PAGES (5 DEBUG_ONLY(+1)) +#define DEFAULT_STACK_RESERVED_PAGES (1) + +#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES +#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES +#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES +#define MIN_STACK_RESERVED_PAGES (0) + +define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); +define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); +define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); +define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); + +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +define_pd_global(bool, PreserveFramePointer, false); + +// GC Ergo Flags +define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread + +define_pd_global(uintx, TypeProfileLevel, 111); + +define_pd_global(bool, CompactStrings, true); +define_pd_global(intx, InitArrayShortSize, BytesPerLong); + +define_pd_global(bool, ThreadLocalHandshakes, false/*true*/); + +// FIXME this turned out to be needed for the core build too? +//#if defined(COMPILER1) || defined(COMPILER2) +define_pd_global(intx, InlineSmallCode, 1000); +//#endif + +// Define it instead providing as option, inlining the constant significantly +// improves perfromance. The option is disabled for AARCH32 in globals.hpp too. +#define UseMembar true + +#define ARCH_FLAGS(develop, \ + product, \ + diagnostic, \ + experimental, \ + notproduct, \ + range, \ + constraint, \ + writeable) \ + \ + product(bool, NearCpool, true, \ + "constant pool is close to instructions") \ + \ + product(bool, UseBarriersForVolatile, false, \ + "Use memory barriers to implement volatile accesses") \ + product(bool, TraceTraps, false, "Trace all traps the signal handler") \ + product(bool, UseSIMDForMemoryOps, false, \ + "Use SIMD instructions in generated memory move code") \ + product(bool, UseNeon, false, \ + "Use Neon for CRC32 computation") \ + product(bool, UseCRC32, false, \ + "Use CRC32 instructions for CRC32 computation") \ + product(bool, JNIFrameAPCS, false, "Assume APCS frame layout for JNI") \ + product(bool, FrameAPCS, false, "Use APCS frame layout") \ + product(bool, VMFrameAPCS, false, "Force APCS frame layout for VM. " \ + "Usually you don't need to set his flag, VM layout is autodetected") \ + product(bool, UseFPU, true, "Enable FPU utilization at floating point ops." \ + "Affects SoftFP mode only.") + +#endif // CPU_AARCH32_VM_GLOBALS_AARCH32_HPP --- /dev/null 2018-09-25 19:24:59.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/icBuffer_aarch32.cpp 2018-09-25 19:24:59.000000000 +0300 @@ -0,0 +1,78 @@ +/* + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_aarch32.hpp" +#include "oops/oop.inline.hpp" + +int InlineCacheBuffer::ic_stub_code_size() { + return /* ldr */ NativeInstruction::arm_insn_sz + + /* far_branch */ MacroAssembler::far_branch_size() + + /* emit_int32 */ NativeInstruction::arm_insn_sz; +} + +#define __ masm-> + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler* masm = new MacroAssembler(&code); + // note: even though the code contains an embedded value, we do not need reloc info + // because + // (1) the value is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear + // assert(cached_value == NULL || cached_oop->is_perm(), "must be perm oop"); + + address start = __ pc(); + Label l; + __ ldr(rscratch2, l); + __ far_jump(ExternalAddress(entry_point)); + __ bind(l); + __ emit_int32((int32_t)cached_value); + // Only need to invalidate the 1st two instructions - not the whole ic stub + ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); + assert(__ pc() - start == ic_stub_code_size(), "must be"); +} + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeJump* jump = nativeJump_at(code_begin + 4); + return jump->jump_destination(); +} + + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + // The word containing the cached value is at the end of this IC buffer + uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); + void* o = (void*)*p; + return o; +} --- /dev/null 2018-09-25 19:25:00.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/icache_aarch32.cpp 2018-09-25 19:25:00.000000000 +0300 @@ -0,0 +1,38 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "runtime/icache.hpp" + +void ICacheStubGenerator::generate_icache_flush( + ICache::flush_icache_stub_t* flush_icache_stub) { + // Give anyone who calls this a surprise + *flush_icache_stub = (ICache::flush_icache_stub_t)NULL; +} + +void ICache::initialize() { +} --- /dev/null 2018-09-25 19:25:01.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/icache_aarch32.hpp 2018-09-25 19:25:01.000000000 +0300 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_ICACHE_AARCH32_HPP +#define CPU_AARCH32_VM_ICACHE_AARCH32_HPP + +// Interface for updating the instruction cache. Whenever the VM +// modifies code, part of the processor instruction cache potentially +// has to be flushed. + +class ICache : public AbstractICache { + public: + static void initialize(); + static void invalidate_word(address addr) { + __clear_cache((char *)addr, (char *)(addr + 3)); + } + static void invalidate_range(address start, int nbytes) { + __clear_cache((char *)start, (char *)(start + nbytes)); + } +}; + +#endif // CPU_AARCH32_VM_ICACHE_AARCH32_HPP --- /dev/null 2018-09-25 19:25:02.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/interp_masm_aarch32.cpp 2018-09-25 19:25:02.000000000 +0300 @@ -0,0 +1,1941 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interp_masm_aarch32.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" + +#include "vm_version_aarch32.hpp" +#include "register_aarch32.hpp" + + +// Implementation of InterpreterMacroAssembler + +void InterpreterMacroAssembler::narrow(Register result) { + // Get method->_constMethod->_result_type + ldr(rscratch1, Address(rfp, frame::get_interpreter_frame_method_offset() * wordSize)); + ldr(rscratch1, Address(rscratch1, Method::const_offset())); + ldrb(rscratch1, Address(rscratch1, ConstMethod::result_type_offset())); + + Label done; + + // common case first + + cmp(rscratch1, T_INT); + b(done, Assembler::EQ); + + // mask integer result to narrower return type. + cmp(rscratch1, T_BOOLEAN); + andr(result, result, 0x1, Assembler::EQ); + + cmp(rscratch1, T_BYTE); + sxtb(result, result, Assembler::ror(), Assembler::EQ); + + cmp(rscratch1, T_CHAR); + uxth(result, result, Assembler::ror(), Assembler::EQ); // truncate upper 16 bits + + sxth(result, result, Assembler::ror(), Assembler::NE); // sign-extend short + + // Nothing to do for T_INT + bind(done); +} + +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + b(entry); +} + +void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { + if (JvmtiExport::can_pop_frame()) { + Label L; + // Initiate popframe handling only if it is not already being + // processed. If the flag has the popframe_processing bit set, it + // means that this code is called *during* popframe handling - we + // don't want to reenter. + // This method is only called just after the call into the vm in + // call_VM_base, so the arg registers are available. + ldr(rscratch1, Address(rthread, JavaThread::popframe_condition_offset())); + tst(rscratch1, JavaThread::popframe_pending_bit); + b(L, Assembler::EQ); + tst(rscratch1, JavaThread::popframe_processing_bit); + b(L, Assembler::NE); + // Call Interpreter::remove_activation_preserving_args_entry() to get the + // address of the same-named entrypoint in the generated interpreter code. + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + b(r0); + bind(L); + } +} + + +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + ldr(r2, Address(rthread, JavaThread::jvmti_thread_state_offset())); + const Address tos_addr(r2, JvmtiThreadState::earlyret_tos_offset()); + const Address oop_addr(r2, JvmtiThreadState::earlyret_oop_offset()); + const Address val_addr(r2, JvmtiThreadState::earlyret_value_offset()); + switch (state) { + case atos: ldr(r0, oop_addr); + mov(rscratch1, 0); + str(rscratch1, oop_addr); + verify_oop(r0, state); break; + case dtos: + if(hasFPU()) { + vldr_f64(d0, val_addr); break; + }//fall through otherwise + case ltos: ldrd(r0, val_addr); break; + case ftos: + if(hasFPU()) { + vldr_f32(d0, val_addr); break; + } //fall through otherwise + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: ldr(r0, val_addr); break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the thread object + mov(rscratch1, (int) ilgl); + str(rscratch1, tos_addr); + mov(rscratch1, 0); + str(rscratch1, val_addr); +} + + +void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { + if (JvmtiExport::can_force_early_return()) { + Label L; + ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset())); + cbz(rscratch1, L); // if (thread->jvmti_thread_state() == NULL) exit; + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + ldr(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_state_offset())); + cmp(rscratch1, JvmtiThreadState::earlyret_pending); + b(L, Assembler::NE); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset())); + ldr(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_tos_offset())); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), rscratch1); + b(r0); + bind(L); + } +} + +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp( + Register reg, + int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + ldrh(reg, Address(rbcp, bcp_offset)); + rev16(reg, reg); +} + +void InterpreterMacroAssembler::get_dispatch() { + mov(rdispatch, ExternalAddress((address)Interpreter::dispatch_table())); +} + +void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, + int bcp_offset, + size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + if (index_size == sizeof(u2)) { + load_unsigned_short(index, Address(rbcp, bcp_offset)); + } else if (index_size == sizeof(u4)) { + // assert(EnableInvokeDynamic, "giant index used only for JSR 292"); + ldr(index, Address(rbcp, bcp_offset)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + inv(index, index); // convert to plain index + } else if (index_size == sizeof(u1)) { + load_unsigned_byte(index, Address(rbcp, bcp_offset)); + } else { + ShouldNotReachHere(); + } +} + +// Return +// Rindex: index into constant pool +// Rcache: address of cache entry - ConstantPoolCache::base_offset() +// +// A caller must add ConstantPoolCache::base_offset() to Rcache to get +// the true address of the cache entry. +// +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, + Register index, + int bcp_offset, + size_t index_size) { + assert_different_registers(cache, index); + assert_different_registers(cache, rcpool); + get_cache_index_at_bcp(index, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry + // aarch32 already has the cache in rcpool so there is no need to + // install it in cache. instead we pre-add the indexed offset to + // rcpool and return it in cache. All clients of this method need to + // be modified accordingly. + add(cache, rcpool, index, lsl( exact_log2(4) + exact_log2(wordSize))); +} + + +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, + Register index, + Register bytecode, + int byte_no, + int bcp_offset, + size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // We use a 32-bit load here since the layout of 64-bit words on + // little-endian machines allow us that. + // n.b. unlike x86 cache already includes the index offset + ldr(bytecode, Address(cache, + ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::indices_offset())); + const int shift_count = (1 + byte_no) * BitsPerByte; + //ubfx(bytecode, bytecode, shift_count, BitsPerByte); + assert(shift_count >= 0 && shift_count <= 24 && 0 == (shift_count & 7), "Invalid shift count"); + uxtb(bytecode, bytecode, ror(shift_count)); +} + +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, + Register tmp, + int bcp_offset, + size_t index_size) { + assert(cache != tmp, "must use different register"); + get_cache_index_at_bcp(tmp, bcp_offset, index_size); + assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); + // convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); + ldr(cache, Address(rfp, frame::get_interpreter_frame_cache_offset() * wordSize)); + // skip past the header + add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + add(cache, cache, tmp, lsl(2 + LogBytesPerWord)); // construct pointer to cache entry +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register mcs, Label& skip) { + Label has_counters; + ldr(mcs, Address(method, Method::method_counters_offset())); + cbnz(mcs, has_counters); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), method); + ldr(mcs, Address(method, Method::method_counters_offset())); + cbz(mcs, skip); // No MethodCounters allocated, OutOfMemory + bind(has_counters); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index, Register tmp) { + assert_different_registers(result, index); + // convert from field index to resolved_references() index and from + // word index to byte offset. Since this is a java object, it can be compressed + + get_constant_pool(result); + // load pointer for resolved_references[] objArray + ldr(result, Address(result, ConstantPool::cache_offset_in_bytes())); + ldr(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); + resolve_oop_handle(result, tmp); + // Add in the index + add(result, result, index, lsl(LogBytesPerHeapOop)); + load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); +} + +void InterpreterMacroAssembler::load_resolved_klass_at_offset( + Register cpool, Register index, Register klass, Register temp) { + add(temp, cpool, index, lsl(LogBytesPerWord)); + ldrh(temp, Address(temp, sizeof(ConstantPool))); // temp = resolved_klass_index + ldr(klass, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); // klass = cpool->_resolved_klasses + add(klass, klass, temp, lsl(LogBytesPerWord)); + ldr(klass, Address(klass, Array::base_offset_in_bytes())); +} + + +// Generate a subtype check: branch to ok_is_subtype if sub_klass is a +// subtype of super_klass. +// +// Args: +// r0: superklass +// Rsub_klass: subklass +// +// Kills: +// r2, r5 +void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, + Label& ok_is_subtype) { + assert(Rsub_klass != r0, "r0 holds superklass"); + assert(Rsub_klass != r2, "r2 holds 2ndary super array length"); + assert(Rsub_klass != r14, "r14 holds 2ndary super array scan ptr"); + + // Profile the not-null value's klass. + profile_typecheck(r2, Rsub_klass, r14); // blows r2 + + // Do the check. + check_klass_subtype(Rsub_klass, r0, r2, ok_is_subtype); // blows r2 + + // Profile the failure of the check. + profile_typecheck_failed(r2); // blows r2 +} + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + ldr(r, post(sp, wordSize)); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + ldr(r, post(sp, wordSize)); +} + +void InterpreterMacroAssembler::pop_l(Register rLo, Register rHi) { + assert(rHi->encoding() == rLo->encoding() + 1, "must use two consecutive registers"); + ldrd(rLo, post(sp, 2 * Interpreter::stackElementSize)); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + str(r, pre(sp, -wordSize)); +} + +void InterpreterMacroAssembler::push_i(Register r) { + str(r, pre(sp, -wordSize)); +} + +void InterpreterMacroAssembler::push_l(Register rLo, Register rHi) { + assert(r2->encoding() == r1->encoding() + 1, "must use two consecutive registers"); + strd(rLo, pre(sp, -2 * wordSize)); +} + +void InterpreterMacroAssembler::pop_f(FloatRegister r) { + vldmia_f32(sp, FloatRegSet(r).bits()); +} + +void InterpreterMacroAssembler::pop_d(FloatRegister r) { + assert(is_even(r->encoding()), "not double!"); + vldmia_f64(sp, DoubleFloatRegSet(r).bits()); +} + +void InterpreterMacroAssembler::push_f(FloatRegister r) { + vstmdb_f32(sp, FloatRegSet(r).bits()); +} + +void InterpreterMacroAssembler::push_d(FloatRegister r) { + assert(is_even(r->encoding()), "not double!"); + vstmdb_f64(sp, DoubleFloatRegSet(r).bits()); +} + +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: pop_i(); break; + case ltos: pop_l(); break; + case ftos: + if(hasFPU()) { + pop_f(); + } else { + pop_i(); + } + break; + case dtos: + if(hasFPU()) { + pop_d(); + } else { + pop_l(); + } + break; + case vtos: /* nothing to do */ break; + default: ShouldNotReachHere(); + } + verify_oop(r0, state); +} + +void InterpreterMacroAssembler::push(TosState state) { + verify_oop(r0, state); + switch (state) { + case atos: push_ptr(); break; + case btos: + case ztos: + case ctos: + case stos: + case itos: push_i(); break; + case ltos: push_l(); break; + case ftos: + if(hasFPU()) { + push_f(); + } else { + push_i(); + } + break; + case dtos: + if(hasFPU()) { + push_d(); + } else { + push_l(); + } + break; + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + +// Helpers for swap and dup +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ldr(val, Address(sp, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + str(val, Address(sp, Interpreter::expr_offset_in_bytes(n))); +} + +// Load ftos/dtos from given address +void InterpreterMacroAssembler::load_float(Address src) { + if (hasFPU()) { + vldr_f32(f0, src); + } else { + ldr(r0, src); + } +} + +void InterpreterMacroAssembler::load_double(Address src) { + if (hasFPU()) { + vldr_f64(d0, src); + } else { + ldrd(r0, r1, src); + } +} + +void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { + // set sender sp + mov(r4, sp); + // record last_sp + str(sp, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); +} + +void print_method_name(Method* m, char * msg) { + if(MacroAssembler::enable_debug) { + printf("%s", msg); + fflush(stdout); + m->print_short_name(); + printf("\n"); + fflush(stdout); + } +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { + prepare_to_jump_from_interpreted(); + + if (JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + // interp_only is an int, on little endian it is sufficient to test the byte only + // Is a cmpl faster? + ldr(temp, Address(rthread, JavaThread::interp_only_mode_offset())); + cbz(temp, run_compiled_code); + ldr(temp, Address(method, Method::interpreter_entry_offset())); + b(temp); + bind(run_compiled_code); + } + + ldr(temp, Address(method, Method::from_interpreted_offset())); + b(temp); +} + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. amd64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +void InterpreterMacroAssembler::dispatch_base(TosState state, + address* table, + bool verifyoop, + bool generate_poll) { + if (VerifyActivationFrameSize) { + Unimplemented(); + } + if (verifyoop) { + verify_oop(r0, state); + } + + /* Debugging code */ + bytecode_seen(rscratch1, r3); + + /*{ + Label skip; + + mov(r3, (address)&MacroAssembler::bytecodes_executed); + ldr(r2, r3); + add(r2, r2, 1); + str(r2, r3); + // Print out every 16384 (needs to be a power of two). + mov(r3, 16384 - 1); + tst(r2, r3); + b(skip, Assembler::NE); + reg_printf_important("Executed %d bytecodes.\n", r2); + bind(skip); + }*/ + + + /*mov(r3, (address)&MacroAssembler::bytecodes_until_print); + ldr(r2, Address(r3)); + cmp(r2, 0); + + sub(r2, r2, 1, Assembler::NE); + str(r2, Address(r3), Assembler::NE); + + mov(r2, 1, Assembler::EQ); + mov(r3, (address)&MacroAssembler::enable_debug, Assembler::EQ); + str(r2, Address(r3), Assembler::EQ); + + mov(r3, (address)&MacroAssembler::enable_method_debug, Assembler::EQ); + str(r2, Address(r3), Assembler::EQ);*/ + + /*Label end; + cmp(r2, 0); + b(end, Assembler::NE); + stop("got to end of bytecodes"); + bind(end);*/ + + get_bytecode(r14, rscratch1); + reg_printf("Dispatching bytecode %s (%d) @ BCP = %p\n", r14, rscratch1, rbcp); + /* End debugging code */ + + Label safepoint; + address* const safepoint_table = Interpreter::safept_table(state); + bool needs_thread_local_poll = generate_poll && + SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; + + if (needs_thread_local_poll) { + NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); + ldr(rscratch2, Address(rthread, Thread::polling_page_offset())); + tbnz(rscratch2, exact_log2(SafepointMechanism::poll_bit()), safepoint); + } + + if (table == Interpreter::dispatch_table(state)) { + add(rscratch2, rscratch1, Interpreter::distance_from_dispatch_table(state)); + ldr(r15_pc, Address(rdispatch, rscratch2, lsl(2))); + } else { + mov(rscratch2, (address)table); + ldr(r15_pc, Address(rscratch2, rscratch1, lsl(2))); + } + + if (needs_thread_local_poll) { + bind(safepoint); + lea(rscratch2, ExternalAddress((address)safepoint_table)); + ldr(r15_pc, Address(rscratch2, rscratch1, lsl(2))); + } +} + +void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); +} + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, Interpreter::normal_table(state)); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { + dispatch_base(state, Interpreter::normal_table(state), false); +} + + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { + // load next bytecode + ldrb(rscratch1, Address(pre(rbcp, step))); + dispatch_base(state, Interpreter::dispatch_table(state), generate_poll); +} + +void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { + // load current bytecode + ldrb(rscratch1, Address(rbcp, 0)); + dispatch_base(state, table); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::remove_activation( + TosState state, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + // Note: Registers r3 xmm0 may be in use for the + // result check if synchronized method + Label unlocked, unlock, no_unlock; + + // get the value of _do_not_unlock_if_synchronized into r3 + const Address do_not_unlock_if_synchronized(rthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + ldrb(r3, do_not_unlock_if_synchronized); + mov(rscratch1, 0); + strb(rscratch1, do_not_unlock_if_synchronized); // reset the flag + + // get method access flags + ldr(rscratch1, Address(rfp, frame::get_interpreter_frame_method_offset() * wordSize)); + ldr(r2, Address(rscratch1, Method::access_flags_offset())); + tst(r2, JVM_ACC_SYNCHRONIZED); + b(unlocked, Assembler::EQ); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag + // is set. + cbnz(r3, no_unlock); + + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object has + // not been unlocked by an explicit monitorexit bytecode. + const Address monitor(rfp, frame::get_interpreter_frame_initial_sp_offset() * + wordSize - (int) sizeof(BasicObjectLock)); + // We use c_rarg1 so that if we go slow path it will be the correct + // register for unlock_object to pass to VM directly + lea(c_rarg1, monitor); // address of first monitor + + ldr(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + cbnz(r0, unlock); + + pop(state); + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. If requested, + // install an illegal_monitor_state_exception. Continue with + // stack unrolling. + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::new_illegal_monitor_state_exception)); + } + b(unlocked); + } + + bind(unlock); + unlock_object(c_rarg1); + pop(state); + + // Check that for block-structured locking (i.e., that all locked + // objects has been unlocked) + bind(unlocked); + + // r0: Might contain return value + // FIXME r1 : Might contain the value too + + // Check that all monitors are unlocked + { + Label loop, exception, entry, restart; + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Address monitor_block_top( + rfp, frame::get_interpreter_frame_monitor_block_top_offset() * wordSize); + const Address monitor_block_bot( + rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize); + + bind(restart); + // We can't use c_rarg1 as it might contain a result + ldr(c_rarg2, monitor_block_top); // points to current entry, starting + // with top-most entry + lea(r14, monitor_block_bot); // points to word before bottom of + // monitor block + b(entry); + + // Entry already locked, need to throw exception + bind(exception); + + if (throw_monitor_exception) { + // Throw exception + MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime:: + throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception. + // Unlock does not block, so don't have to worry about the frame. + // We don't have to preserve c_rarg1 since we are going to throw an exception. + + push(state); + mov(c_rarg1, c_rarg2); + unlock_object(c_rarg1); + pop(state); + + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + new_illegal_monitor_state_exception)); + } + + b(restart); + } + + bind(loop); + // check if current entry is used + ldr(rscratch1, Address(c_rarg2, BasicObjectLock::obj_offset_in_bytes())); + cbnz(rscratch1, exception); + + add(c_rarg2, c_rarg2, entry_size); // otherwise advance to next entry + bind(entry); + cmp(c_rarg2, r14); // check if bottom reached + b(loop, Assembler::NE); // if not at bottom then check this entry + } + + bind(no_unlock); + + // jvmti support + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + if (StackReservedPages > 0) { + // testing if reserved zone needs to be re-enabled + Label no_reserved_zone_enabling; + + ldr(rscratch1, Address(rthread, JavaThread::reserved_stack_activation_offset())); + cmp(sp, rscratch1); + b(no_reserved_zone_enabling, Assembler::LS); + + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), rthread); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_delayed_StackOverflowError)); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); + } + + // remove activation + // get sender sp + ldr(rscratch1, + Address(rfp, frame::get_interpreter_frame_sender_sp_offset() * wordSize)); + // remove frame anchor + leave(); + // If we're returning to interpreted code we will shortly be + // adjusting SP to allow some space for ESP. If we're returning to + // compiled code the saved sender SP was saved in sender_sp, so this + // restores it. + //bic(sp, rscratch1, 0xf); changed to not drop it as this is the sp + mov(sp, rscratch1); +} + +// Lock object +// +// Args: +// c_rarg1: BasicObjectLock to be used for locking +// +// Kills: +// r0 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs) +// rscratch1, rscratch2 (scratch regs) +void InterpreterMacroAssembler::lock_object(Register lock_reg) +{ + reg_printf("LOCK:\n"); + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); + if (UseHeavyMonitors) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } else { + Label done; + + const Register swap_reg = r0; + const Register obj_reg = c_rarg3; // Will contain the oop + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + + BasicLock::displaced_header_offset_in_bytes(); + + Label slow_case; + + // Load object pointer into obj_reg %c_rarg3 + ldr(obj_reg, Address(lock_reg, obj_offset)); + + if (UseBiasedLocking) { + biased_locking_enter(obj_reg, swap_reg, rscratch2, rscratch1, false, done, &slow_case); + } + + // Load (object->mark() | 1) into swap_reg + ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + orr(swap_reg, rscratch1, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + str(swap_reg, Address(lock_reg, mark_offset)); + + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); + + Label fail; + if (PrintBiasedLockingStatistics) { + Label fast; + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail); + bind(fast); + atomic_inc(Address((address)BiasedLocking::fast_path_entry_count_addr()), + rscratch2, rscratch1); + b(done); + bind(fail); + } else { + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); + } + + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 7) == 0, and + // 2) rsp <= mark < mark + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - rsp) & (7 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 3 bits clear. + // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg + // NOTE2: aarch32 does not like to subtract sp from rn so take a + // copy + + + //mov(rscratch1, sp); + //sub(swap_reg, swap_reg, rscratch1); + //ands(swap_reg, swap_reg, (unsigned long)(7 - os::vm_page_size())); + sub(swap_reg, swap_reg, sp); + mov(rscratch1, (os::vm_page_size() - 1) & ~0b11); + bics(swap_reg, swap_reg, rscratch1); + + // Save the test result, for recursive case, the result is zero + str(swap_reg, Address(lock_reg, mark_offset)); + + if (PrintBiasedLockingStatistics) { + b(slow_case, Assembler::NE); + atomic_inc(Address((address)BiasedLocking::fast_path_entry_count_addr()), + rscratch2, rscratch1); + } + b(done, Assembler::EQ); + + bind(slow_case); + + // Call the runtime routine for slow case + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + + bind(done); + } +} + + +// Unlocks an object. Used in monitorexit bytecode and +// remove_activation. Throws an IllegalMonitorException if object is +// not locked by current thread. +// +// Args: +// c_rarg1: BasicObjectLock for lock +// +// Kills: +// r0 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs) +// rscratch1, rscratch2 (scratch regs) +void InterpreterMacroAssembler::unlock_object(Register lock_reg) +{ + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); + + reg_printf("UNLOCK:\n"); + if (UseHeavyMonitors) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); + } else { + Label done; + + //create_breakpoint(); + const Register swap_reg = c_rarg0; + const Register header_reg = c_rarg2; // Will contain the old oopMark + const Register obj_reg = c_rarg3; // Will contain the oop + + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock + // structure Store the BasicLock address into %r0 + lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + + // Load oop into obj_reg(%c_rarg3) + ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + // Free entry + mov(rscratch2, 0); + str(rscratch2, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); + + if (UseBiasedLocking) { + biased_locking_exit(obj_reg, header_reg, done); + } + + // Load the old header from BasicLock structure + ldr(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); + + // Test for recursion + cbz(header_reg, done); + + // Atomic swap back the old header + cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL); + + // Call the runtime routine for slow case. + str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), + lock_reg); + + bind(done); + + restore_bcp(); + } +} + +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, + Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ldr(mdp, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize)); + cbz(mdp, zero_continue); +} + +// Set the method data pointer for the current bcp. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + strd(r0, r1, Address(pre(sp, -2 * wordSize))); + + // Test MDO to avoid the call if it is NULL. + ldr(r0, Address(rmethod, in_bytes(Method::method_data_offset()))); + cbz(r0, set_mdp); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), rmethod, rbcp); + // r0: mdi + // mdo is guaranteed to be non-zero here, we checked for it before the call. + ldr(r1, Address(rmethod, in_bytes(Method::method_data_offset()))); + lea(r1, Address(r1, in_bytes(MethodData::data_offset()))); + add(r0, r1, r0); + str(r0, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize)); + bind(set_mdp); + ldrd(r0, r1, Address(post(sp, 2 * wordSize))); +} + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + strd(r0, r1, Address(pre(sp, -2 * wordSize))); + strd(r2, r3, Address(pre(sp, -2 * wordSize))); + test_method_data_pointer(r3, verify_continue); // If mdp is zero, continue + get_method(r1); + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + ldrsh(r2, Address(r3, in_bytes(DataLayout::bci_offset()))); + ldr(rscratch1, Address(r1, Method::const_offset())); + add(r2, r2, rscratch1); + lea(r2, Address(r2, ConstMethod::codes_offset())); + cmp(r2, rbcp); + b(verify_continue, Assembler::EQ); + // r1: method + // rbcp: bcp // rbcp == 22 + // r3: mdp + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), + r1, rbcp, r3); + bind(verify_continue); + ldrd(r2, r3, Address(post(sp, 2 * wordSize))); + ldrd(r0, r1, Address(post(sp, 2 * wordSize))); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, + int constant, + Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + Address data(mdp_in, constant); + str(value, data); +} + + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int constant, + bool decrement) { + increment_mdp_data_at(mdp_in, noreg, constant, decrement); +} + +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + Register reg, + int constant, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // %%% this does 64bit counters at best it is wasting space + // at worst it is a rare bug when counters overflow + + assert_different_registers(rscratch2, rscratch1, mdp_in, reg); + + Address addr1(mdp_in, constant); + Address addr2(rscratch2, reg, lsl(0)); + Address &addr = addr1; + if (reg != noreg) { + lea(rscratch2, addr1); + addr = addr2; + } + + if (decrement) { + // Decrement the register. Set condition codes. + // Intel does this + // addptr(data, (int32_t) -DataLayout::counter_increment); + // If the decrement causes the counter to overflow, stay negative + // Label L; + // jcc(Assembler::negative, L); + // addptr(data, (int32_t) DataLayout::counter_increment); + // so we do this + ldr(rscratch1, addr); + subs(rscratch1, rscratch1, (unsigned)DataLayout::counter_increment); + Label L; + b(L, Assembler::LO); // skip store if counter underflow + str(rscratch1, addr); + bind(L); + } else { + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + // Intel does this + // Increment the register. Set carry flag. + // addptr(data, DataLayout::counter_increment); + // If the increment causes the counter to overflow, pull back by 1. + // sbbptr(data, (int32_t)0); + // so we do this + ldr(rscratch1, addr); + adds(rscratch1, rscratch1, DataLayout::counter_increment); + Label L; + b(L, Assembler::CS); // skip store if counter overflow + str(rscratch1, addr); + bind(L); + } +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, + int flag_byte_constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + int flags_offset = in_bytes(DataLayout::flags_offset()); + // Set the flag + ldrb(rscratch1, Address(mdp_in, flags_offset)); + orr(rscratch1, rscratch1, flag_byte_constant); + strb(rscratch1, Address(mdp_in, flags_offset)); +} + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + if (test_value_out == noreg) { + ldr(rscratch1, Address(mdp_in, offset)); + cmp(value, rscratch1); + } else { + // Put the test value into a register, so caller can use it: + ldr(test_value_out, Address(mdp_in, offset)); + cmp(value, test_value_out); + } + b(not_equal_continue, Assembler::NE); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ldr(rscratch1, Address(mdp_in, offset_of_disp)); + add(mdp_in, mdp_in, rscratch1); + str(mdp_in, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, + Register reg, + int offset_of_disp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + lea(rscratch1, Address(mdp_in, offset_of_disp)); + ldr(rscratch1, Address(rscratch1, reg, lsl())); + add(mdp_in, mdp_in, rscratch1); + str(mdp_in, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, + int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add(mdp_in, mdp_in, (unsigned) constant); + str(mdp_in, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + // save/restore across call_VM + mov(rscratch1, 0); + strd(rscratch1, return_bci, Address(pre(sp, -2 * wordSize))); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), + return_bci); + ldrd(rscratch1, return_bci, Address(post(sp, 2 * wordSize))); +} + + +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, + Register bumped_count) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + // We inline increment_mdp_data_at to return bumped_count in a register + //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); + Address data(mdp, in_bytes(JumpData::taken_offset())); + ldr(bumped_count, data); + assert(DataLayout::counter_increment == 1, + "flow-free idiom only works with 1"); + // Intel does this to catch overflow + // addptr(bumped_count, DataLayout::counter_increment); + // sbbptr(bumped_count, 0); + // so we do this + adds(bumped_count, bumped_count, DataLayout::counter_increment); + Label L; + b(L, Assembler::CS); // skip store if counter overflow + str(bumped_count, data); + bind(L); + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); + + // The method data pointer needs to be updated to correspond to + // the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::profile_virtual_call(Register receiver, + Register mdp, + Register reg2, + bool receiver_can_be_null) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + // We are making a call. Increment the count for null receiver. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + b(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, reg2, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. +#if INCLUDE_JVMCI + if (MethodProfileWidth == 0) { + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + } +#else // INCLUDE_JVMCI + update_mdp_by_constant(mdp, + in_bytes(VirtualCallData:: + virtual_call_data_size())); +#endif // INCLUDE_JVMCI + bind(profile_continue); + } +} + +#if INCLUDE_JVMCI +void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { + assert_different_registers(method, mdp, reg2); + if (ProfileInterpreter && MethodProfileWidth > 0) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label done; + record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, + &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); + bind(done); + + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} +#endif // INCLUDE_JVMCI + +// This routine creates a state machine for updating the multi-row +// type profile at a virtual call site (or other type-sensitive bytecode). +// The machine visits each row (of receiver/count) until the receiver type +// is found, or until it runs out of rows. At the same time, it remembers +// the location of the first empty row. (An empty row records null for its +// receiver, and can be allocated for a newly-observed receiver type.) +// Because there are two degrees of freedom in the state, a simple linear +// search will not work; it must be a decision tree. Hence this helper +// function is recursive, to generate the required tree structured code. +// It's the interpreter, so we are trading off code space for speed. +// See below for example code. +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) { + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset())); + } +#endif // INCLUDE_JVMCI + } else { + int non_profiled_offset = -1; + if (is_virtual_call) { + non_profiled_offset = in_bytes(CounterData::count_offset()); + } +#if INCLUDE_JVMCI + else if (EnableJVMCI) { + non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); + } +#endif // INCLUDE_JVMCI + + record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, + &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); + } +} + +void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset) { + int last_row = total_rows - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the item and for null. + // Take any of three different outcomes: + // 1. found item => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + bool test_for_null_also = (row == start_row); + + // See if the item is item[n]. + int item_offset = in_bytes(item_offset_fn(row)); + test_mdp_data_at(mdp, item_offset, item, + (test_for_null_also ? reg2 : noreg), + next_test); + // (Reg2 now contains the item from the CallData.) + + // The item is item[n]. Increment count[n]. + int count_offset = in_bytes(item_count_offset_fn(row)); + increment_mdp_data_at(mdp, count_offset); + b(done); + bind(next_test); + + if (test_for_null_also) { + Label found_null; + // Failed the equality check on item[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (non_profiled_offset >= 0) { + cbz(reg2, found_null); + // Item did not match any saved item and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, non_profiled_offset); + b(done); + bind(found_null); + } else { + cbnz(reg2, done); + } + break; + } + // Since null is rare, make it be the branch-taken case. + cbz(reg2,found_null); + + // Put all the "Case 3" tests here. + record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, + item_offset_fn, item_count_offset_fn, non_profiled_offset); + + // Found a null. Keep searching for a matching item, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching item, but we + // observed the item[start_row] is NULL. + + // Fill in the item field and increment the count. + int item_offset = in_bytes(item_offset_fn(start_row)); + set_mdp_data_at(mdp, item_offset, item); + int count_offset = in_bytes(item_count_offset_fn(start_row)); + mov(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { + b(done); + } +} + +// Example state machine code for three profile rows: +// // main copy of decision tree, rooted at row[1] +// if (row[0].rec == rec) { row[0].incr(); goto done; } +// if (row[0].rec != NULL) { +// // inner copy of decision tree, rooted at row[1] +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[1].rec != NULL) { +// // degenerate decision tree, rooted at row[2] +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// if (row[2].rec != NULL) { count.incr(); goto done; } // overflow +// row[2].init(rec); goto done; +// } else { +// // remember row[1] is empty +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[1].init(rec); goto done; +// } +// } else { +// // remember row[0] is empty +// if (row[1].rec == rec) { row[1].incr(); goto done; } +// if (row[2].rec == rec) { row[2].incr(); goto done; } +// row[0].init(rec); goto done; +// } +// done: + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, Register reg2, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); + + bind (done); +} + +void InterpreterMacroAssembler::profile_ret(Register return_bci, + Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, + in_bytes(RetData::bci_offset(row)), + return_bci, noreg, + next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, + in_bytes(RetData::bci_displacement_offset(row))); + b(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, true); + + bind (profile_continue); + } +} + +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, reg2, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, + in_bytes(MultiBranchData::default_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + in_bytes(MultiBranchData:: + default_displacement_offset())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_switch_case(Register index, + Register mdp, + Register reg2) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + + // case_array_offset_in_bytes() + mov(reg2, in_bytes(MultiBranchData::per_case_size())); + mov(rscratch1, in_bytes(MultiBranchData::case_array_offset())); + Assembler::mla(index, index, reg2, rscratch1); + + // Update the case count + increment_mdp_data_at(mdp, + index, + in_bytes(MultiBranchData::relative_count_offset())); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, + index, + in_bytes(MultiBranchData:: + relative_displacement_offset())); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { + if (state == atos) { + MacroAssembler::verify_oop(reg); + } +} + +void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } + + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (JvmtiExport::can_post_interpreter_events()) { + Label L; + ldr(r3, Address(rthread, JavaThread::interp_only_mode_offset())); + cbz(r3, L); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_method_entry)); + bind(L); + } + +#ifdef DTRACE_ENABLED + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + get_method(c_rarg1); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + rthread, c_rarg1); + } +#endif + + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + get_method(c_rarg1); + call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + rthread, c_rarg1); + } + +} + + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode) { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { + Label L; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + push(state); + ldr(r3, Address(rthread, JavaThread::interp_only_mode_offset())); + cbz(r3, L); + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + bind(L); + pop(state); + } + +#ifdef DTRACE_ENABLED + { + SkipIfEqual skip(this, &DTraceMethodProbes, false); + push(state); + get_method(c_rarg1); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + rthread, c_rarg1); + pop(state); + } +#endif +} + + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, Address mask, + Register scratch, Register scratch2, + bool preloaded, Condition cond, + Label* where) { + if (!preloaded) { + ldr(scratch, counter_addr); + } + add(scratch, scratch, increment); + str(scratch, counter_addr); + ldr(scratch2, mask); + ands(scratch, scratch, scratch2); + if (where) + b(*where, cond); +} + +void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments, + Label *retaddr) { + // interpreter specific + // + // Note: No need to save/restore rbcp & rlocals pointer since these + // are callee saved registers and no blocking/ GC can happen + // in leaf calls. +#ifdef ASSERT + { + Label L; + ldr(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + cbz(rscratch1, L); + stop("InterpreterMacroAssembler::call_VM_leaf_base:" + " last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, retaddr); +} + +void InterpreterMacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // interpreter specific + // + // Note: Could avoid restoring locals ptr (callee saved) - however doesn't + // really make a difference for these runtime calls, since they are + // slow anyway. Btw., bcp must be saved/restored since it may change + // due to GC. + // assert(java_thread == noreg , "not expecting a precomputed java thread"); + save_bcp(); +#ifdef ASSERT + { + Label L; + ldr(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + cbz(rscratch1, L); + stop("InterpreterMacroAssembler::call_VM_base:" + " last_sp != NULL"); + bind(L); + } +#endif /* ASSERT */ + // super call + MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp, + entry_point, number_of_arguments, + check_exceptions); +// interpreter specific + restore_bcp(); + //restore_locals(); +} + +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { + assert_different_registers(obj, rscratch1); + Label update, next, none; + + verify_oop(obj); + + cbnz(obj, update); + orptr(mdo_addr, TypeEntries::null_seen); + b(next); + + bind(update); + load_klass(obj, obj); + + ldr(rscratch1, mdo_addr); + eor(obj, obj, rscratch1); + bics(rscratch1, obj, ~TypeEntries::type_klass_mask); + b(next, Assembler::EQ); // klass seen before, nothing to + // do. The unknown bit may have been + // set already but no need to check. + + tst(obj, TypeEntries::type_unknown); + b(next, Assembler::NE); // already unknown. Nothing to do anymore. + + ldr(rscratch1, mdo_addr); + cbz(rscratch1, none); + cmp(rscratch1, TypeEntries::null_seen); + b(none, Assembler::EQ); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + ldr(rscratch1, mdo_addr); + eor(obj, obj, rscratch1); + bics(rscratch1, obj, ~TypeEntries::type_klass_mask); + b(next, Assembler::EQ); + + // different than before. Cannot keep accurate profile. + orptr(mdo_addr, TypeEntries::type_unknown); + b(next); + + bind(none); + // first time here. Set profile type. + str(obj, mdo_addr); + + bind(next); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + ldrb(rscratch1, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); + cmp(rscratch1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); + b(profile_continue, Assembler::NE); + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + add(mdp, mdp, off_to_args); + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args)); + sub(tmp, tmp, i*TypeStackSlotEntries::per_arg_count()); + cmp(tmp, TypeStackSlotEntries::per_arg_count()); + b(done, Assembler::LT); + } + ldr(tmp, Address(callee, Method::const_offset())); + load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args)); + sub(tmp, tmp, rscratch1); + sub(tmp, tmp, 1); + Address arg_addr = argument_address(tmp); + ldr(tmp, arg_addr); + + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp, mdo_arg_addr); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + add(mdp, mdp, to_add); + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args)); + sub(tmp, tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + add(mdp, mdp, tmp, lsl(exact_log2(DataLayout::cell_size))); + } + str(mdp, Address(rfp, frame::get_interpreter_frame_mdp_offset() * wordSize)); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, rbcp); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + ldrb(rscratch1, Address(rbcp, 0)); + cmp(rscratch1, Bytecodes::_invokedynamic); + b(do_profile, Assembler::EQ); + cmp(rscratch1, Bytecodes::_invokehandle); + b(do_profile, Assembler::EQ); + get_method(tmp); + ldrh(rscratch1, Address(tmp, Method::intrinsic_id_offset_in_bytes())); + mov(tmp, vmIntrinsics::_compiledLambdaForm); + cmp(rscratch1, tmp); + b(profile_continue, Assembler::NE); + + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + mov(tmp, ret); + profile_obj_type(tmp, mdo_ret_addr); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { + assert_different_registers(rscratch1, rscratch2, mdp, tmp1, tmp2); + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + ldr(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); + cmp(tmp1, 0u); + b(profile_continue, Assembler::LT); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + add(mdp, mdp, tmp1); + ldr(tmp1, Address(mdp, ArrayData::array_len_offset())); + sub(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + int per_arg_scale = exact_log2(DataLayout::cell_size); + add(rscratch1, mdp, off_base); + add(rscratch2, mdp, type_base); + + Address arg_off(rscratch1, tmp1, lsl(per_arg_scale)); + Address arg_type(rscratch2, tmp1, lsl(per_arg_scale)); + + // load offset on the stack from the slot for this parameter + ldr(tmp2, arg_off); + neg(tmp2, tmp2); + // read the parameter from the local area + ldr(tmp2, Address(rlocals, tmp2, lsl(Interpreter::logStackElementSize))); + + // profile the parameter + profile_obj_type(tmp2, arg_type); + + // go to next parameter + subs(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); + b(loop, Assembler::GE); + + bind(profile_continue); + } +} --- /dev/null 2018-09-25 19:25:03.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/interp_masm_aarch32.hpp 2018-09-25 19:25:03.000000000 +0300 @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_INTERP_MASM_AARCH32_64_HPP +#define CPU_AARCH32_VM_INTERP_MASM_AARCH32_64_HPP + +#include "asm/macroAssembler.hpp" +#include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" + +// This file specializes the assember with interpreter-specific macros + +typedef ByteSize (*OffsetFunction)(uint); + +class InterpreterMacroAssembler: public MacroAssembler { + protected: + // Interpreter specific version of call_VM_base + using MacroAssembler::call_VM_leaf_base; + + public: + virtual void call_VM_leaf_base(address entry_point, + int number_of_arguments, + Label *retaddr = NULL); + protected: + virtual void call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions); + + // base routine for all dispatches + void dispatch_base(TosState state, address* table, + bool verifyoop = true, bool generate_poll = false); + + public: + InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {} + + void load_earlyret_value(TosState state); + + void jump_to_entry(address entry); + + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + // Interpreter-specific registers + void save_bcp() { + str(rbcp, Address(rfp, frame::get_interpreter_frame_bcp_offset() * wordSize)); + } + + void restore_bcp() { + ldr(rbcp, Address(rfp, frame::get_interpreter_frame_bcp_offset() * wordSize)); + } + + void restore_locals() { + ldr(rlocals, Address(rfp, frame::get_interpreter_frame_locals_offset() * wordSize)); + } + + void restore_constant_pool_cache() { + ldr(rcpool, Address(rfp, frame::get_interpreter_frame_cache_offset() * wordSize)); + } + + void get_dispatch(); + + // Helpers for runtime call arguments/results + + void get_method(Register reg) { + ldr(reg, Address(rfp, frame::get_interpreter_frame_method_offset() * wordSize)); + } + + void get_const(Register reg) { + get_method(reg); + ldr(reg, Address(reg, in_bytes(Method::const_offset()))); + } + + void get_constant_pool(Register reg) { + get_const(reg); + ldr(reg, Address(reg, in_bytes(ConstMethod::constants_offset()))); + } + + void get_constant_pool_cache(Register reg) { + get_constant_pool(reg); + ldr(reg, Address(reg, ConstantPool::cache_offset_in_bytes())); + } + + void get_cpool_and_tags(Register cpool, Register tags) { + get_constant_pool(cpool); + ldr(tags, Address(cpool, ConstantPool::tags_offset_in_bytes())); + } + + void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); + void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); + void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); + void get_method_counters(Register method, Register mcs, Label& skip); + + // load cpool->resolved_references(index); + void load_resolved_reference_at_index(Register result, Register index, Register tmp = r5); + + // load cpool->resolved_klass_at(index); + void load_resolved_klass_at_offset(Register cpool, Register index, Register klass, Register temp); + + void pop_ptr(Register r = r0); + void pop_i(Register r = r0); + void pop_l(Register rLo = r0, Register rHi = r1); + void push_ptr(Register r = r0); + void push_i(Register r = r0); + void push_l(Register rLo = r0, Register rHi = r1); + + void push_f(FloatRegister r = d0); + void push_d(FloatRegister r = d0); + void pop_f(FloatRegister r = d0); + void pop_d(FloatRegister r = d0); + + + void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } + + void push(Register r ) { ((MacroAssembler*)this)->push(r); } + + void pop(TosState state); // transition vtos -> state + void push(TosState state); // transition state -> vtos + + void pop(RegSet regs, Register stack) { ((MacroAssembler*)this)->pop(regs, stack); } + void push(RegSet regs, Register stack) { ((MacroAssembler*)this)->push(regs, stack); } + + void empty_expression_stack() { + ldr(sp, Address(rfp, frame::get_interpreter_frame_monitor_block_top_offset() * wordSize)); + // NULL last_sp until next java call + mov(rscratch1, 0); + str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + } + + // Helpers for swap and dup + void load_ptr(int n, Register val); + void store_ptr(int n, Register val); + + // Load ftos/dtos from given address + void load_float(Address src); + void load_double(Address src); + + // Generate a subtype check: branch to ok_is_subtype if sub_klass is + // a subtype of super_klass. + void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); + + // Dispatching + void dispatch_prolog(TosState state, int step = 0); + void dispatch_epilog(TosState state, int step = 0); + // dispatch via rscratch1 + void dispatch_only(TosState state, bool generate_poll = false); + // dispatch normal table via rscratch1 (assume rscratch1 is loaded already) + void dispatch_only_normal(TosState state); + void dispatch_only_noverify(TosState state); + // load rscratch1 from [rbcp + step] and dispatch via rscratch1 + void dispatch_next(TosState state, int step = 0, bool generate_poll = false); + // load rscratch1 from [esi] and dispatch via rscratch1 and table + void dispatch_via (TosState state, address* table); + + // jump to an invoked target + void prepare_to_jump_from_interpreted(); + void jump_from_interpreted(Register method, Register temp); + + + // Returning from interpreted functions + // + // Removes the current activation (incl. unlocking of monitors) + // and sets up the return address. This code is also used for + // exception unwindwing. In that case, we do not want to throw + // IllegalMonitorStateExceptions, since that might get us into an + // infinite rethrow exception loop. + // Additionally this code is used for popFrame and earlyReturn. + // In popFrame case we want to skip throwing an exception, + // installing an exception, and notifying jvmdi. + // In earlyReturn case we only want to skip throwing an exception + // and installing an exception. + void remove_activation(TosState state, + bool throw_monitor_exception = true, + bool install_monitor_exception = true, + bool notify_jvmdi = true); + + // FIXME: Give us a valid frame at a null check. + virtual void null_check(Register reg, int offset = -1) { +// #ifdef ASSERT +// save_bcp(); +// set_last_Java_frame(sp, rfp, (address) pc()); +// #endif + MacroAssembler::null_check(reg, offset); +// #ifdef ASSERT +// reset_last_Java_frame(true); +// #endif + } + + // Object locking + void lock_object (Register lock_reg); + void unlock_object(Register lock_reg); + + // Interpreter profiling operations + void set_method_data_pointer_for_bcp(); + void test_method_data_pointer(Register mdp, Label& zero_continue); + void verify_method_data_pointer(); + + void set_mdp_data_at(Register mdp_in, int constant, Register value); + void increment_mdp_data_at(Address data, bool decrement = false); + void increment_mdp_data_at(Register mdp_in, int constant, + bool decrement = false); + void increment_mdp_data_at(Register mdp_in, Register reg, int constant, + bool decrement = false); + void increment_mask_and_jump(Address counter_addr, + int increment, Address mask, + Register scratch, Register scratch2, + bool preloaded, Condition cond, + Label* where); + void set_mdp_flag_at(Register mdp_in, int flag_constant); + void test_mdp_data_at(Register mdp_in, int offset, Register value, + Register test_value_out, + Label& not_equal_continue); + + void record_klass_in_profile(Register receiver, Register mdp, + Register reg2, bool is_virtual_call); + void record_klass_in_profile_helper(Register receiver, Register mdp, + Register reg2, int start_row, + Label& done, bool is_virtual_call); + void record_item_in_profile_helper(Register item, Register mdp, + Register reg2, int start_row, Label& done, int total_rows, + OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, + int non_profiled_offset); + + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); + void update_mdp_for_ret(Register return_bci); + + // narrow int return value + void narrow(Register result); + + void profile_taken_branch(Register mdp, Register bumped_count); + void profile_not_taken_branch(Register mdp); + void profile_call(Register mdp); + void profile_final_call(Register mdp); + void profile_virtual_call(Register receiver, Register mdp, + Register scratch2, + bool receiver_can_be_null = false); + void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; + void profile_ret(Register return_bci, Register mdp); + void profile_null_seen(Register mdp); + void profile_typecheck(Register mdp, Register klass, Register scratch); + void profile_typecheck_failed(Register mdp); + void profile_switch_default(Register mdp); + void profile_switch_case(Register index_in_scratch, Register mdp, + Register scratch2); + + void profile_obj_type(Register obj, const Address& mdo_addr); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); + + // Debugging + // only if +VerifyOops && state == atos + void verify_oop(Register reg, TosState state = atos); + // only if +VerifyFPU && (state == ftos || state == dtos) + void verify_FPU(int stack_depth, TosState state = ftos); + + typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; + + // support for jvmti/dtrace + void notify_method_entry(); + void notify_method_exit(TosState state, NotifyMethodExitMode mode); + + virtual void _call_Unimplemented(address call_site) { + save_bcp(); + set_last_Java_frame(sp, rfp, (address) pc(), rscratch1); + MacroAssembler::_call_Unimplemented(call_site); + } +}; + +#endif // CPU_AARCH32_VM_INTERP_MASM_AARCH32_64_HPP --- /dev/null 2018-09-25 19:25:04.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/interpreterRT_aarch32.cpp 2018-09-25 19:25:04.000000000 +0300 @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "memory/universe.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/signature.hpp" + +#define __ _masm-> + +/*#define print_copy(name, off) \ + __ mov(rscratch1, (address)name);\ + __ mov(rscratch2, off);\ + __ reg_printf("%s copied from offset %p + %d\n", rscratch1, from(), rscratch2);*/ + +#define print_copy(name, off) + +// Implementation of SignatureHandlerGenerator +Register InterpreterRuntime::SignatureHandlerGenerator::from() { return rlocals; } +Register InterpreterRuntime::SignatureHandlerGenerator::to() { return r4; } +Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return rscratch1; } + +InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( + const methodHandle &method, CodeBuffer* buffer): + NativeSignatureIterator(method), + _next_double_dex(0), + _stack_offset(0) +{ + _masm = new MacroAssembler(buffer); + _num_int_args = (method->is_static() ? 1 : 0); + // See layout in interpreter_aarch32.cpp + _fp_arg_mask = (1 <<(Argument::n_float_register_parameters_c * 3)) - 1; +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { + print_copy(__FUNCTION__, Interpreter::local_offset_in_bytes(offset())); + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + + switch (_num_int_args) { + case 0: + __ ldr(c_rarg1, src); + _num_int_args++; + break; + case 1: + __ ldr(c_rarg2, src); + _num_int_args++; + break; + case 2: + __ ldr(c_rarg3, src); + _num_int_args++; + break; + default: + __ ldr(r0, src); + __ str(r0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + _num_int_args++; + break; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { + print_copy(__FUNCTION__, Interpreter::local_offset_in_bytes(offset() + 1)); + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + // Needs to be aligned to even registers. Means also won't be split across + // registers and stack. + + switch (_num_int_args) { + case 0: + case 1: + __ ldrd(c_rarg2, c_rarg3, src); + _num_int_args = 3; // force next args onto stack + break; + default: + __ ldrd(r0, temp(), src); + _stack_offset = (_stack_offset + 7) & ~7; // Align on 8-byte boundary + __ strd(r0, temp(), Address(to(), _stack_offset)); + _stack_offset += 2 * wordSize; + _num_int_args += 2; + break; + } +} + +#ifdef HARD_FLOAT_CC +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + print_copy(__FUNCTION__, Interpreter::local_offset_in_bytes(offset())); + const Address src(from(), Interpreter::local_offset_in_bytes(offset())); + + if (_fp_arg_mask & ((1 << Argument::n_float_register_parameters_c*2)-1)) { + unsigned index = __builtin_ctz(_fp_arg_mask); + __ vldr_f32(as_FloatRegister(index), src); + _fp_arg_mask &= ~(1 << index); + _next_double_dex += (~index) & 1; + } else { + __ ldr(r0, src); + __ str(r0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + print_copy(__FUNCTION__, Interpreter::local_offset_in_bytes(offset() + 1)); + const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1)); + + if (_next_double_dex < Argument::n_float_register_parameters_c) { + _fp_arg_mask &= ~((3 << _next_double_dex*2) | ((1 << _next_double_dex+16))); + __ vldr_f64(as_DoubleFloatRegister(_next_double_dex++), src); + } else { + __ ldrd(r0, temp(), src); + _stack_offset = (_stack_offset + 7) & ~7; + __ strd(r0, temp(), Address(to(), _stack_offset)); + _stack_offset += 2 * wordSize; + } +} +#else +// Just pass them in integer registers and on the stack as we would +// any other argument +void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { + pass_int(); +} + +void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { + pass_long(); +} +#endif //HARD_FLOAT_CC + +void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { + print_copy(__FUNCTION__, Interpreter::local_offset_in_bytes(offset())); + + switch (_num_int_args) { + case 0: + assert(offset() == 0, "argument register 1 can only be (non-null) receiver"); + __ add(c_rarg1, from(), Interpreter::local_offset_in_bytes(offset())); + _num_int_args++; + break; + case 1: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ mov(c_rarg2, 0); + __ ldr(temp(), r0); + Label L; + __ cbz(temp(), L); + __ mov(c_rarg2, r0); + __ bind(L); + _num_int_args++; + break; + } + case 2: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ mov(c_rarg3, 0); + __ ldr(temp(), r0); + Label L; + __ cbz(temp(), L); + __ mov(c_rarg3, r0); + __ bind(L); + _num_int_args++; + break; + } + default: + { + __ add(r0, from(), Interpreter::local_offset_in_bytes(offset())); + __ ldr(temp(), r0); + Label L; + __ cbnz(temp(), L); + __ mov(r0, 0); + __ bind(L); + __ str(r0, Address(to(), _stack_offset)); + _stack_offset += wordSize; + _num_int_args++; + break; + } + } +} + +void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { + // generate code to handle arguments + iterate(fingerprint); + + // return result handler + __ lea(r0, ExternalAddress(Interpreter::result_handler(method()->result_type()))); + __ b(lr); + + __ flush(); +} + + +// Implementation of SignatureHandlerLibrary + +void SignatureHandlerLibrary::pd_set_handler(address handler) {} + + +class SlowSignatureHandler : public NativeSignatureIterator { + private: + address _from; + intptr_t* _to; + intptr_t* _int_args; + intptr_t* _fp_args; + intptr_t* _fp_identifiers; + + int _num_int_reg_args; + int _next_double_dex; + + virtual void pass_int() + { + jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_int_reg_args < Argument::n_int_register_parameters_c-1) { + *_int_args++ = from_obj; + _num_int_reg_args++; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_long() + { + intptr_t high_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0)); + intptr_t low_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_num_int_reg_args < Argument::n_int_register_parameters_c-2) { + // Passing longs. As c_rarg0 is always reserved for jni_env we could only + // possibly stash a long in r3:r2 due to alignment so we can only enter here + // with either zero or one parameters. + // Align to two + _int_args += 1 - _num_int_reg_args; // 0 or 1 + *_int_args++ = low_obj; + *_int_args++ = high_obj; + _num_int_reg_args = 3; + } else { + _to = (intptr_t*)(((intptr_t)_to + 7) & ~7); // Align to eight bytes + *_to++ = low_obj; + *_to++ = high_obj; + _num_int_reg_args = 3; + } + } + + virtual void pass_object() + { + intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if (_num_int_reg_args < Argument::n_int_register_parameters_c-1) { + *_int_args++ = (*from_addr == 0) ? NULL : (intptr_t)from_addr; + _num_int_reg_args++; + } else { + *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; + } + } +#ifdef HARD_FLOAT_CC + virtual void pass_float() + { + jint from_obj = *(jint*)(_from+Interpreter::local_offset_in_bytes(0)); + _from -= Interpreter::stackElementSize; + + if ((*_fp_identifiers) & 0xffff) { + unsigned index = __builtin_ctz(*_fp_identifiers); + _fp_args[index] = from_obj; + *_fp_identifiers ^= 1 << index; + _next_double_dex += (~index) & 1; + } else { + *_to++ = from_obj; + } + } + + virtual void pass_double() + { + intptr_t high_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0)); + intptr_t low_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); + _from -= 2*Interpreter::stackElementSize; + + if (_next_double_dex < Argument::n_float_register_parameters_c) { + //We can allocate to a register. + int index = _next_double_dex++; + *_fp_identifiers &= ~((3 << index*2) | (1 << index+16)); + _fp_args[index*2] = low_obj; + _fp_args[index*2 + 1] = high_obj; + } else { + _to = (intptr_t*)(((intptr_t)_to + 7) & ~7); // Align to eight bytes + *_to++ = low_obj; + *_to++ = high_obj; + } + } +#else + virtual void pass_float() { pass_int(); } + virtual void pass_double() { pass_long(); } +#endif // HARD_FLOAT_CC + + public: + SlowSignatureHandler(const methodHandle &method, address from, intptr_t* to) + : NativeSignatureIterator(method) + { + _from = from; + _to = to; + // See layout in interpreter_aarch32.cpp + _int_args = to - (method->is_static() ? 19 : 20); + _fp_args = to - 16; //each slot is for a double + _fp_identifiers = to - 21; + *_fp_identifiers = (1 <<(Argument::n_float_register_parameters_c * 3)) - 1; + + _num_int_reg_args = (method->is_static() ? 1 : 0); + _next_double_dex = 0; + } +}; + + +IRT_ENTRY(address, + InterpreterRuntime::slow_signature_handler(JavaThread* thread, + Method* method, + intptr_t* from, + intptr_t* to)) + methodHandle m(thread, (Method*)method); + assert(m->is_native(), "sanity check"); + + // handle arguments + SlowSignatureHandler ssh(m, (address)from, to); + ssh.iterate(UCONST64(-1)); + + // return result handler + return Interpreter::result_handler(m->result_type()); +IRT_END --- /dev/null 2018-09-25 19:25:05.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/interpreterRT_aarch32.hpp 2018-09-25 19:25:05.000000000 +0300 @@ -0,0 +1,62 @@ +/* + * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_INTERPRETERRT_AARCH32_HPP +#define CPU_AARCH32_VM_INTERPRETERRT_AARCH32_HPP + +// This is included in the middle of class Interpreter. +// Do not include files here. + +// native method calls + +class SignatureHandlerGenerator: public NativeSignatureIterator { + private: + MacroAssembler* _masm; + unsigned int _fp_arg_mask; + int _num_int_args; + unsigned _next_double_dex; + int _stack_offset; + + void pass_int(); + void pass_long(); + void pass_float(); + void pass_double(); + void pass_object(); + + public: + // Creation + SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); + + // Code generation + void generate(uint64_t fingerprint); + + // Code generation support + static Register from(); + static Register to(); + static Register temp(); +}; + +#endif // CPU_AARCH32_VM_INTERPRETERRT_AARCH32_HPP --- /dev/null 2018-09-25 19:25:06.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/javaFrameAnchor_aarch32.hpp 2018-09-25 19:25:06.000000000 +0300 @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_JAVAFRAMEANCHOR_AARCH32_HPP +#define CPU_AARCH32_VM_JAVAFRAMEANCHOR_AARCH32_HPP + +private: + + // FP value associated with _last_Java_sp: + intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to + +public: + // Each arch must define reset, save, restore + // These are used by objects that only care about: + // 1 - initializing a new state (thread creation, javaCalls) + // 2 - saving a current state (javaCalls) + // 3 - restoring an old state (javaCalls) + + void clear(void) { + // clearing _last_Java_sp must be first + _last_Java_sp = NULL; + OrderAccess::release(); + _last_Java_fp = NULL; + _last_Java_pc = NULL; + } + + void copy(JavaFrameAnchor* src) { + // In order to make sure the transition state is valid for "this" + // We must clear _last_Java_sp before copying the rest of the new data + // + // Hack Alert: Temporary bugfix for 4717480/4721647 + // To act like previous version (pd_cache_state) don't NULL _last_Java_sp + // unless the value is changing + // + if (_last_Java_sp != src->_last_Java_sp) { + _last_Java_sp = NULL; + OrderAccess::release(); + } + _last_Java_fp = src->_last_Java_fp; + _last_Java_pc = src->_last_Java_pc; + // Must be last so profiler will always see valid frame if has_last_frame() is true + _last_Java_sp = src->_last_Java_sp; + } + + bool walkable(void) { return _last_Java_sp != NULL && _last_Java_pc != NULL; } + void make_walkable(JavaThread* thread); + void capture_last_Java_pc(void); + + intptr_t* last_Java_sp(void) const { return _last_Java_sp; } + + address last_Java_pc(void) { return _last_Java_pc; } + +private: + + static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } + +public: + + void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; OrderAccess::release(); } + + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + // Assert (last_Java_sp == NULL || fp == NULL) + void set_last_Java_fp(intptr_t* fp) { OrderAccess::release(); _last_Java_fp = fp; } + +#endif // CPU_AARCH32_VM_JAVAFRAMEANCHOR_AARCH32_HPP --- /dev/null 2018-09-25 19:25:07.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/jniFastGetField_aarch32.cpp 2018-09-25 19:25:07.000000000 +0300 @@ -0,0 +1,195 @@ +/* + * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "memory/resourceArea.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/safepoint.hpp" + +#define __ masm-> + +#define BUFFER_SIZE_ARMV7 31*wordSize +#define BUFFER_SIZE_ARMV6 51*wordSize + +// Instead of issuing a LoadLoad barrier we create an address +// dependency between loads; this might be more efficient. + +// Common register usage: +// r0/v0: result +// c_rarg0: jni env +// c_rarg1: obj +// c_rarg2: jfield id + +address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { + Register result = c_rarg0; + Register robj = c_rarg1; + Register rcounter = c_rarg3; + int args = RegSet::of(c_rarg0, c_rarg1, c_rarg2).bits(); + int nargs = 3; + + const char *name; + switch (type) { + case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; + case T_BYTE: name = "jni_fast_GetByteField"; break; + case T_CHAR: name = "jni_fast_GetCharField"; break; + case T_SHORT: name = "jni_fast_GetShortField"; break; + case T_INT: name = "jni_fast_GetIntField"; break; + case T_LONG: name = "jni_fast_GetLongField"; break; + case T_FLOAT: name = "jni_fast_GetFloatField"; break; + case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; + default: ShouldNotReachHere(); name = ""; // unreachable + } + ResourceMark rm; + BufferBlob* blob = BufferBlob::create(name, + VM_Version::features() & FT_ARMV7 ? + BUFFER_SIZE_ARMV7 : + BUFFER_SIZE_ARMV6 ); + CodeBuffer cbuf(blob); + MacroAssembler* masm = new MacroAssembler(&cbuf); + address fast_entry = __ pc(); + + Label slow; + + __ lea(rcounter, SafepointSynchronize::safepoint_counter_addr()); + __ ldr(rcounter, rcounter); + __ tst(rcounter, 1); + __ b(slow, Assembler::NE); + __ stmdb(sp, args); + // doesn't change c_rarg1 but does force a dependency on rcounter before + // performing __ ldr(robj, ... + __ eor(robj, c_rarg1, rcounter); + __ eor(robj, robj, rcounter); + + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->try_resolve_jobject_in_native(masm, c_rarg0, robj, noreg, slow); + + assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); + speculative_load_pclist[count] = __ pc(); // Used by the segfault handler + // c_rarg2 * 2 is offset + // Only ldr & ldrb support shifted loads + switch (type) { + case T_FLOAT: + case T_INT: __ ldr (result, Address(robj, c_rarg2, lsr(2))); break; + case T_BOOLEAN: __ ldrb(result, Address(robj, c_rarg2, lsr(2))); break; + default: { + __ lsr(c_rarg2, c_rarg2, 2); + switch(type) { + case T_BYTE: __ ldrsb (result, Address(robj, c_rarg2)); break; + case T_CHAR: __ ldrh (result, Address(robj, c_rarg2)); break; + case T_SHORT: __ ldrsh (result, Address(robj, c_rarg2)); break; + case T_DOUBLE: + case T_LONG: __ ldrd (result, Address(robj, c_rarg2)); break; + default: ShouldNotReachHere(); + } + } + } + __ lea(rscratch2, SafepointSynchronize::safepoint_counter_addr()); + // rscratch2 is address dependent on result. + // TODO Do we need to force dependency on r1 too? + __ eor(rscratch2, rscratch2, result); + __ eor(rscratch2, rscratch2, result); + __ ldr(rscratch2, rscratch2); + __ cmp(rcounter, rscratch2); + +#ifdef HARD_FLOAT_CC + switch (type) { + case T_FLOAT: __ vmov_f32(d0, result, Assembler::EQ); break; + case T_DOUBLE: __ vmov_f64(d0, r0, r1, Assembler::EQ); break; // Change me if result changes + default: break; + } +#endif//HARD_FLOAT_CC + + __ add(sp, sp, nargs * wordSize, Assembler::EQ); // Pop args if we don't need them. + __ b(lr, Assembler::EQ); + + // Restore args for slowcase call into the vm + __ ldmia(sp, args); + + // Slowcase + slowcase_entry_pclist[count++] = __ pc(); + __ bind(slow); + + address slow_case_addr = NULL; + switch (type) { + case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; + case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; + case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; + case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; + case T_INT: slow_case_addr = jni_GetIntField_addr(); break; + case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; + case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; + case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; + default: ShouldNotReachHere(); + } + + { + __ enter(); + __ lea(rscratch2, ExternalAddress(slow_case_addr)); + __ bl(rscratch2); + __ maybe_isb(); + __ leave(); + __ b(lr); + } + __ flush (); + + return fast_entry; +} + +address JNI_FastGetField::generate_fast_get_boolean_field() { + return generate_fast_get_int_field0(T_BOOLEAN); +} + +address JNI_FastGetField::generate_fast_get_byte_field() { + return generate_fast_get_int_field0(T_BYTE); +} + +address JNI_FastGetField::generate_fast_get_char_field() { + return generate_fast_get_int_field0(T_CHAR); +} + +address JNI_FastGetField::generate_fast_get_short_field() { + return generate_fast_get_int_field0(T_SHORT); +} + +address JNI_FastGetField::generate_fast_get_int_field() { + return generate_fast_get_int_field0(T_INT); +} + +address JNI_FastGetField::generate_fast_get_long_field() { + return generate_fast_get_int_field0(T_LONG); +} + +address JNI_FastGetField::generate_fast_get_float_field() { + return generate_fast_get_int_field0(T_FLOAT); +} + +address JNI_FastGetField::generate_fast_get_double_field() { + return generate_fast_get_int_field0(T_DOUBLE); +} --- /dev/null 2018-09-25 19:25:08.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/jniTypes_aarch32.hpp 2018-09-25 19:25:08.000000000 +0300 @@ -0,0 +1,158 @@ +/* + * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_JNITYPES_AARCH32_HPP +#define CPU_AARCH32_VM_JNITYPES_AARCH32_HPP + +#include "jni.h" +#include "memory/allocation.hpp" +#include "oops/oop.hpp" + +// This file holds platform-dependent routines used to write primitive jni +// types to the array of arguments passed into JavaCalls::call + +class JNITypes : AllStatic { + // These functions write a java primitive type (in native format) + // to a java stack slot array to be passed as an argument to JavaCalls:calls. + // I.e., they are functionally 'push' operations if they have a 'pos' + // formal parameter. Note that jlong's and jdouble's are written + // _in reverse_ of the order in which they appear in the interpreter + // stack. This is because call stubs (see stubGenerator_sparc.cpp) + // reverse the argument list constructed by JavaCallArguments (see + // javaCalls.hpp). + +public: + // Ints are stored in native format in one JavaCallArgument slot at *to. + static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } + static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } + static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } + + // Longs are stored in native format in one JavaCallArgument slot at + // *(to+1). + /*static inline void put_long(jlong from, intptr_t *to) { + *(jlong*) (to + 1) = from; + } + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + *(jlong*) (to + 1 + pos) = *from; + pos += 2; + }*/ + static inline void put_long(jlong from, intptr_t *to) { + uint64_t val = from; + uint64_t mask = (1LL << 32) - 1; + val = (val >> 32) | ((val & mask) << 32); + *(jlong*)to = (jlong)val; + } + + static inline void put_long(jlong from, intptr_t *to, int& pos) { + uint64_t val = from; + uint64_t mask = (1LL << 32) - 1; + val = (val >> 32) | ((val & mask) << 32); + + *(jlong*) (to + pos) = (jlong)val; + pos += 2; + } + + static inline void put_long(jlong *from, intptr_t *to, int& pos) { + uint64_t val = *from; + uint64_t mask = (1LL << 32) - 1; + val = (val >> 32) | ((val & mask) << 32); + + *(jlong*) (to + pos) = (jlong)val; + pos += 2; + } + + + + // Oops are stored in native format in one JavaCallArgument slot at *to. + static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } + static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } + static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } + + // Floats are stored in native format in one JavaCallArgument slot at *to. + static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } + static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } + static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } + +#undef _JNI_SLOT_OFFSET +#define _JNI_SLOT_OFFSET 1 + // Doubles are stored in native word format in one JavaCallArgument + // slot at *(to+1). + /*static inline void put_double(jdouble from, intptr_t *to) { + *(jdouble*) (to + 1) = from; + } + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = from; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + *(jdouble*) (to + 1 + pos) = *from; + pos += 2; + }*/ + + static inline void put_double(jdouble from, intptr_t *to) { + uint64_t val = *(uint64_t*)&from; + uint64_t mask = (1LL << 32) - 1; + val = (val >> 32) | ((val & mask) << 32); + *(uint64_t*)to = val; + } + + static inline void put_double(jdouble from, intptr_t *to, int& pos) { + uint64_t val = *(uint64_t*)&from; + uint64_t mask = (1LL << 32) - 1; + val = (val >> 32) | ((val & mask) << 32); + *(uint64_t*) (to + pos) = val; + pos += 2; + } + + static inline void put_double(jdouble *from, intptr_t *to, int& pos) { + uint64_t val = *(uint64_t*)from; + uint64_t mask = (1LL << 32) - 1; + val = (val >> 32) | ((val & mask) << 32); + *(uint64_t*) (to + pos) = val; + pos += 2; + } + + + // The get_xxx routines, on the other hand, actually _do_ fetch + // java primitive types from the interpreter stack. + // No need to worry about alignment on Intel. + static inline jint get_int (intptr_t *from) { return *(jint *) from; } + static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } + static inline oop get_obj (intptr_t *from) { return *(oop *) from; } + static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } + static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } +#undef _JNI_SLOT_OFFSET +}; + +#endif // CPU_AARCH32_VM_JNITYPES_AARCH32_HPP --- /dev/null 2018-09-25 19:25:09.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/jni_aarch32.h 2018-09-25 19:25:09.000000000 +0300 @@ -0,0 +1,59 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Oracle designates this + * particular file as subject to the "Classpath" exception as provided + * by Oracle in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +#ifndef _JAVASOFT_JNI_MD_H_ +#define _JAVASOFT_JNI_MD_H_ + +#if defined(SOLARIS) || defined(LINUX) || defined(_ALLBSD_SOURCE) + + +// Note: please do not change these without also changing jni_md.h in the JDK +// repository +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif +#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility) + #define JNIEXPORT __attribute__((visibility("default"))) + #define JNIIMPORT __attribute__((visibility("default"))) +#else + #define JNIEXPORT + #define JNIIMPORT +#endif + + #define JNICALL + typedef int jint; + typedef long long jlong; +#else + #define JNIEXPORT __declspec(dllexport) + #define JNIIMPORT __declspec(dllimport) + #define JNICALL __stdcall + + typedef int jint; + typedef __int64 jlong; +#endif + +typedef signed char jbyte; + +#endif /* !_JAVASOFT_JNI_MD_H_ */ --- /dev/null 2018-09-25 19:25:10.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/macroAssembler_aarch32.cpp 2018-09-25 19:25:10.000000000 +0300 @@ -0,0 +1,4941 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include + +#include "precompiled.hpp" +#include "jvm.h" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "interpreter/interpreter.hpp" +#include "compiler/disassembler.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_aarch32.hpp" +#include "oops/accessDecorators.hpp" +//This ifdef was introduced so a core build can be built +#ifdef COMPILER2 +#include "opto/compile.hpp" +#include "opto/node.hpp" +#endif + +#include "runtime/biasedLocking.hpp" +#include "runtime/icache.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/jniHandles.inline.hpp" +#include "runtime/sharedRuntime.hpp" + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// FIXME This is not a nice fix, this constant was in a compiler2 header +#define MAX_stubs_size_div2 (128 / 2) +// FIXME END + +// Note the corrections in the following three instructions for the PC. +// All literal modes that use the PC need to have the offset adjusted +// Patch any kind of instruction; there may be several instructions. +// Return the total length (in bytes) of the instructions. + +int MacroAssembler::pd_patch_instruction_size(address branch, address target) { + // Note the corrections + int instructions = 1; + long offset = target - (branch + 8); // correct for that PC = PC_this + 2 instructions + bool add = offset >= 0; + unsigned insn = *(unsigned*)branch; + int opc = Instruction_aarch32::extract(insn, 27, 24); + + if(0b1010 == opc || 0b1011 == opc) { + // Branch or branch with link + assert(0 == (offset & 3), "not aligned correctly"); + Instruction_aarch32::spatch(branch, 23, 0, offset / 4); + } else if (0b0011 == opc) { + // Movw, Movt or mov, orr, orr, orr + // patch up address load to registers (absolute address). + instructions = patch_oop(branch, target) / NativeInstruction::arm_insn_sz; + } else if (0b010 == (opc >> 1)) { + // LDR, LDRB, STR, STRB + Instruction_aarch32::patch(branch, 11, 0, uabs(offset)); + Instruction_aarch32::patch(branch, 23, 23, add); + } else if (0b000 == (opc >> 1)) { + // LDRH, LDRSH, LDRSB, LDRD, STRH, STRD + offset = uabs(offset); + Instruction_aarch32::patch(branch, 3, 0, offset & 0xf); + Instruction_aarch32::patch(branch, 11, 8, offset >> 4); + Instruction_aarch32::patch(branch, 23, 23, add); + } else if (0b1101 == opc) { + // VLDR, VSTR - NOTE VSTR(lit) is deprecated + offset = uabs(offset); + assert(0 == (offset & 3), "vldr, vstr can't do unaligned access"); + Instruction_aarch32::patch(branch, 7, 0, offset >> 2); + Instruction_aarch32::patch(branch, 23, 23, add); + } else if (0b0010 == opc) { + // ADR + Instruction_aarch32::patch(branch, 11, 0, encode_imm12(uabs(offset))); + Instruction_aarch32::patch(branch, 23, 22, add ? 0b10 : 0b01 ); + } else { + ShouldNotReachHere(); + } + // aarch64 had something for polling page load? + return instructions * NativeInstruction::arm_insn_sz; +} + +int MacroAssembler::patch_oop(address insn_addr, address o) { + unsigned insn = *(unsigned*)insn_addr; + int opc = Instruction_aarch32::extract(insn, 27, 21); + if(0b0011000 == opc) { + //32-bit pointers, formed of a mov and a movt + assert(nativeInstruction_at(insn_addr+4)->is_movt(), "wrong insns in patch"); + + uint32_t btm = (uint32_t)o & 0xffff; + Instruction_aarch32::patch(insn_addr, 19, 16, btm >> 12); + Instruction_aarch32::patch(insn_addr, 11, 0, btm & 0xfff); + uint32_t top = (uint32_t)o >> 16; + Instruction_aarch32::patch(insn_addr + 4, 19, 16, top >> 12); + Instruction_aarch32::patch(insn_addr + 4, 11, 0, top & 0xfff); + return 2 * NativeInstruction::arm_insn_sz; + } else if(0b0011101 == opc) { + //Instead 32bit load sequence uses mov, orr, orr, orr + assert(nativeInstruction_at(insn_addr+4 )->is_orr(), "wrong insns in patch"); + assert(nativeInstruction_at(insn_addr+8 )->is_orr(), "wrong insns in patch"); + assert(nativeInstruction_at(insn_addr+12)->is_orr(), "wrong insns in patch"); + // FIXME this could carry us outside valid memory + + uint32_t addr = (uint32_t)o; + Instruction_aarch32::patch(insn_addr + 0, 11, 0, (0b0000 << 8) | ((addr >> 0) & 0xff)); + Instruction_aarch32::patch(insn_addr + 4, 11, 0, (0b1100 << 8) | ((addr >> 8) & 0xff)); + Instruction_aarch32::patch(insn_addr + 8, 11, 0, (0b1000 << 8) | ((addr >> 16) & 0xff)); + Instruction_aarch32::patch(insn_addr + 12, 11, 0, (0b0100 << 8) | ((addr >> 24) & 0xff)); + return 4 * NativeInstruction::arm_insn_sz; + } else { + ShouldNotReachHere(); + } + return 0; //won't reach here +} + +address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) { + long offset = 0; + int opc = Instruction_aarch32::extract(insn, 27, 24); + + if(0b1010 == opc || 0b1011 == opc) { + // Branch or branch with link + offset = Instruction_aarch32::sextract(insn, 23, 0) * 4; + } else if (0b0011 == opc) { + unsigned *insn_buf = (unsigned*)insn_addr; + int opc2 = Instruction_aarch32::extract(insn, 23, 21); + if(0b000 == opc2) { + // movw, movt (only on newer ARMs) + assert(nativeInstruction_at(&insn_buf[1])->is_movt(), "wrong insns in patch"); + uint32_t addr; + addr = Instruction_aarch32::extract(insn_buf[1], 19, 16) << 28; + addr |= Instruction_aarch32::extract(insn_buf[1], 11, 0) << 16; + addr |= Instruction_aarch32::extract(insn_buf[0], 19, 16) << 12; + addr |= Instruction_aarch32::extract(insn_buf[0], 11, 0); + return address(addr); + } else if(0b101 == opc2) { + // mov, orr, orr, orr + assert(nativeInstruction_at(&insn_buf[1])->is_orr(), "wrong insns in patch"); + assert(nativeInstruction_at(&insn_buf[2])->is_orr(), "wrong insns in patch"); + assert(nativeInstruction_at(&insn_buf[3])->is_orr(), "wrong insns in patch"); + uint32_t addr; + // TODO Check that the rotations are in the expected order. + addr = Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[0], 11, 0)); + addr |= Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[1], 11, 0)); + addr |= Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[2], 11, 0)); + addr |= Assembler::decode_imm12(Instruction_aarch32::extract(insn_buf[3], 11, 0)); + return address(addr); + } else { + ShouldNotReachHere(); + } + } else if (0b010 == (opc >> 1)) { + // LDR, LDRB, STR, STRB + offset = Instruction_aarch32::extract(insn, 11, 0); + bool add = Instruction_aarch32::extract(insn, 23, 23); + offset = add ? offset : -offset; + } else if (0b000 == (opc >> 1)) { + // LDRH, LDRSH, LDRSB, LDRD, STRH, STRD + offset = Instruction_aarch32::extract(insn, 3, 0); + offset |= Instruction_aarch32::extract(insn, 11, 8) << 4; + bool add = Instruction_aarch32::extract(insn, 23, 23); + offset = add ? offset : -offset; + } else if (0b1101 == opc) { + // VLDR, VSTR - NOTE VSTR(lit) is deprecated + offset = Instruction_aarch32::extract(insn, 7, 0) << 2; + bool add = Instruction_aarch32::extract(insn, 23, 23); + offset = add ? offset : -offset; + } else if (0b0010 == opc) { + // ADR + offset = decode_imm12(Instruction_aarch32::extract(insn, 11, 0)); + int code = Instruction_aarch32::extract(insn, 23, 22); + switch(code) { + case 0b01: offset = -offset; break; + case 0b10: break; + default: ShouldNotReachHere(); + } + } else { + ShouldNotReachHere(); + } + //Correct offset for PC + offset += 8; + return address(((uint32_t)insn_addr + offset)); +} + + +void MacroAssembler::serialize_memory(Register thread, Register tmp) { + dmb(Assembler::ISH); +} + +void MacroAssembler::safepoint_poll(Label& slow_path) { + if (SafepointMechanism::uses_thread_local_poll()) { + ldr(rscratch1, Address(rthread, Thread::polling_page_offset())); + tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path); + } else { + mov(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state())); + ldr(rscratch1, Address(rscratch1)); + cmp(rscratch1, SafepointSynchronize::_not_synchronized); + b(slow_path, Assembler::NE); + } +} + +// Just like safepoint_poll, but use an acquiring load for thread- +// local polling. +// +// We need an acquire here to ensure that any subsequent load of the +// global SafepointSynchronize::_state flag is ordered after this load +// of the local Thread::_polling page. We don't want this poll to +// return false (i.e. not safepointing) and a later poll of the global +// SafepointSynchronize::_state spuriously to return true. +// +// This is to avoid a race when we're in a native->Java transition +// racing the code which wakes up from a safepoint. +// +void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { + if (SafepointMechanism::uses_thread_local_poll()) { + lea(rscratch1, Address(rthread, Thread::polling_page_offset())); + ldr(rscratch1, rscratch1); + dmb(Assembler::ISH); + tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path); + } else { + safepoint_poll(slow_path); + } +} + +void MacroAssembler::reset_last_Java_frame(bool clear_fp) { + mov(rscratch1, 0); + // we must set sp to zero to clear frame + str(rscratch1, Address(rthread, JavaThread::last_Java_sp_offset())); + // must clear fp, so that compiled frames are not confused; it is + // possible that we need it only for debugging + if (clear_fp) { + str(rscratch1, Address(rthread, JavaThread::last_Java_fp_offset())); + } + + // Always clear the pc because it could have been set by make_walkable() + str(rscratch1, Address(rthread, JavaThread::last_Java_pc_offset())); +} + +// Calls to C land +// +// When entering C land, the rfp & sp of the last Java frame have to be recorded +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp +// has to be reset to 0. This is required to allow proper stack traversal. +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Register last_java_pc, + Register scratch) { + + if (last_java_pc->is_valid()) { + str(last_java_pc, Address(rthread, + JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset())); + } + + // determine last_java_sp register + if (last_java_sp == sp) { + mov(scratch, sp); + last_java_sp = scratch; + } else if (!last_java_sp->is_valid()) { + last_java_sp = sp; + } + + str(last_java_sp, Address(rthread, JavaThread::last_Java_sp_offset())); + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + str(last_java_fp, Address(rthread, JavaThread::last_Java_fp_offset())); + } +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc, + Register scratch) { + if (last_java_pc != NULL) { + adr(scratch, last_java_pc); + } else { + // FIXME: This is almost never correct. We should delete all + // cases of set_last_Java_frame with last_java_pc=NULL and use the + // correct return address instead. + adr(scratch, pc()); + } + + str(scratch, Address(rthread, + JavaThread::frame_anchor_offset() + + JavaFrameAnchor::last_Java_pc_offset())); + + set_last_Java_frame(last_java_sp, last_java_fp, noreg, scratch); +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label &L, + Register scratch) { + if (L.is_bound()) { + set_last_Java_frame(last_java_sp, last_java_fp, target(L), scratch); + } else { + InstructionMark im(this); + L.add_patch_at(code(), locator()); + set_last_Java_frame(last_java_sp, last_java_fp, (address)NULL, scratch); + } +} + +void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf) { + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + if (far_branches()) { + lea(lr, entry); + if (cbuf) cbuf->set_insts_mark(); + bl(lr); + } else { + if (cbuf) cbuf->set_insts_mark(); + bl(entry); + } +} + +void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + if (far_branches()) { + lea(tmp, entry); + if (cbuf) cbuf->set_insts_mark(); + b(tmp); + } else { + if (cbuf) cbuf->set_insts_mark(); + b(entry); + } +} + +void MacroAssembler::reserved_stack_check() { + // testing if reserved zone needs to be enabled + Label no_reserved_zone_enabling; + + ldr(rscratch1, Address(rthread, JavaThread::reserved_stack_activation_offset())); + cmp(sp, rscratch1); + b(no_reserved_zone_enabling, Assembler::LO); + + enter(); // LR and FP are live. + lea(rscratch1, CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)); + mov(c_rarg0, rthread); + bl(rscratch1); + leave(); + + // We have already removed our own frame. + // throw_delayed_StackOverflowError will think that it's been + // called by our caller. + lea(rscratch1, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry())); + b(rscratch1); + should_not_reach_here(); + + bind(no_reserved_zone_enabling); +} + +int MacroAssembler::biased_locking_enter(Register obj_reg, + Register swap_reg, + Register tmp_reg, + Register tmp_reg2, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + + if (PrintBiasedLockingStatistics && counters == NULL) + counters = BiasedLocking::counters(); + + assert(tmp_reg != noreg, "must be real register"); + assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp_reg2); + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + ldr(swap_reg, mark_addr); + } + andr(tmp_reg, swap_reg, markOopDesc::biased_lock_mask_in_place); + cmp(tmp_reg, markOopDesc::biased_lock_pattern); + b(cas_label, Assembler::NE); + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, tmp_reg, rthread); + eor(tmp_reg, swap_reg, tmp_reg); +// andr(tmp_reg, tmp_reg, ~((int) markOopDesc::age_mask_in_place)); + bic(tmp_reg, tmp_reg, markOopDesc::age_mask_in_place); + if (counters != NULL) { + Label around; + cbnz(tmp_reg, around); + atomic_inc(Address((address)counters->biased_lock_entry_count_addr()), tmp_reg, tmp_reg2); + b(done); + bind(around); + } else { + cbz(tmp_reg, done); + } + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + andr(tmp_reg2, tmp_reg, markOopDesc::biased_lock_mask_in_place); + cbnz(tmp_reg2, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + andr(tmp_reg2, tmp_reg, markOopDesc::epoch_mask_in_place); + cbnz(tmp_reg2, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + { + Label here; + mov(tmp_reg2, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + andr(swap_reg, swap_reg, tmp_reg2); + orr(tmp_reg, swap_reg, rthread); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, tmp_reg2, here, slow_case); + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + bind(here); + if (counters != NULL) { + atomic_inc(Address((address)counters->anonymously_biased_lock_entry_count_addr()), + tmp_reg, tmp_reg2); + } + } + b(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label here; + load_prototype_header(tmp_reg, obj_reg); + orr(tmp_reg, rthread, tmp_reg); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, tmp_reg2, here, slow_case); + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + bind(here); + if (counters != NULL) { + atomic_inc(Address((address)counters->rebiased_lock_entry_count_addr()), + tmp_reg, tmp_reg2); + } + } + b(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + { + Label here, nope; + load_prototype_header(tmp_reg, obj_reg); + cmpxchg_obj_header(swap_reg, tmp_reg, obj_reg, tmp_reg2, here, &nope); + bind(here); + + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + atomic_inc(Address((address)counters->revoked_lock_entry_count_addr()), tmp_reg, + tmp_reg2); + } + bind(nope); + } + + bind(cas_label); + + return null_check_offset; +} + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ldr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andr(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); + cmp(temp_reg, markOopDesc::biased_lock_pattern); + b(done, Assembler::EQ); +} + + +static void pass_arg0(MacroAssembler* masm, Register arg) { + if (c_rarg0 != arg ) { + masm->mov(c_rarg0, arg); + } +} + +static void pass_arg1(MacroAssembler* masm, Register arg) { + if (c_rarg1 != arg ) { + masm->mov(c_rarg1, arg); + } +} + +static void pass_arg2(MacroAssembler* masm, Register arg) { + if (c_rarg2 != arg ) { + masm->mov(c_rarg2, arg); + } +} + +static void pass_arg3(MacroAssembler* masm, Register arg) { + if (c_rarg3 != arg ) { + masm->mov(c_rarg3, arg); + } +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // determine java_thread register + if (!java_thread->is_valid()) { + java_thread = rthread; + } + + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = sp; + } + + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + assert(java_thread == rthread, "unexpected register"); + + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + + // push java thread (becomes first argument of C function) + + mov(c_rarg0, java_thread); + + // set last Java frame before call + assert(last_java_sp != rfp, "can't use rfp"); + + Label l; + set_last_Java_frame(last_java_sp, rfp, l, rscratch2); + + + // FIXME - Can save lr in more elegant way ? + //str(lr, pre(sp, -wordSize)); + + // do the call, remove parameters + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l); + + //ldr(lr, post(sp, wordSize)); + + // reset last Java frame + // Only interpreter should have to clear fp + reset_last_Java_frame(true); + + // C++ interp handles this in the interpreter + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); + + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + ldr(rscratch2, Address(java_thread, in_bytes(Thread::pending_exception_offset()))); + Label ok; + cbz(rscratch2, ok); + + lea(rscratch2, RuntimeAddress(StubRoutines::forward_exception_entry())); + // forward_exception uses LR to choose exception handler but LR is trashed by previous code + // since we used to get here from interpreted code BL is acceptable way to acquire correct LR (see StubGenerator::generate_forward_exception) + bl(rscratch2); + bind(ok); + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, java_thread); + } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); +} + +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. + +void MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) { + assert(JavaThread::current()->is_Compiler_thread(), "just checking"); + assert(entry.rspec().type() == relocInfo::runtime_call_type + || entry.rspec().type() == relocInfo::opt_virtual_call_type + || entry.rspec().type() == relocInfo::static_call_type + || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + + if (cbuf) { + cbuf->set_insts_mark(); + } + + if (far_branches()) { + // Have make trampoline such way: destination address should be raw 4 byte value, + // so it's patching could be done atomically. + relocate(entry.rspec()); + address start = pc(); + add(lr, r15_pc, NativeCall::instruction_size - 2 * NativeInstruction::arm_insn_sz); + ldr(r15_pc, Address(r15_pc, 4)); + emit_int32((uintptr_t) entry.target()); + // possibly pad the call to the NativeCall size to make patching happy + while (pc() - start < NativeCall::instruction_size) { + nop(); + } + assert(pc() - start == NativeCall::instruction_size, "fix NativeTrampolineCall::instruction_size!"); + } else { + bl(entry); + } +} + +void MacroAssembler::c2bool(Register x) { + ands(r0, r0, 0xff); + mov(r0, 1, Assembler::NE); +} + +void MacroAssembler::ic_call(address entry, jint method_index) { + RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); + // address const_ptr = long_constant((jlong)Universe::non_oop_word()); + // unsigned long offset; + // ldr_constant(rscratch2, const_ptr); + movptr(rscratch2, (uintptr_t)Universe::non_oop_word()); + trampoline_call(Address(entry, rh)); +} + +// Implementation of call_VM versions + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + call_VM_base(oop_result, rthread, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + ldr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + assert(oop_result != rscratch2, "can't be"); + mov(rscratch2, 0); + str(rscratch2, Address(java_thread, JavaThread::vm_result_offset())); + verify_oop(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + ldr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + assert(metadata_result != rscratch2 && + java_thread != rscratch2, "can't be"); + mov(rscratch2, 0); + str(rscratch2, Address(java_thread, JavaThread::vm_result_2_offset())); +} + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + +// these are no-ops overridden by InterpreterMacroAssembler + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { } + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { } + + +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterOrConstant(value + offset); + + // load indirectly to solve generation ordering problem + ldr(tmp, ExternalAddress((address) delayed_value_addr)); + + if (offset != 0) + add(tmp, tmp, offset); + + return RegisterOrConstant(tmp); +} + + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& L_no_such_interface, + bool return_method) { + assert_different_registers(recv_klass, intf_klass, scan_temp); + assert_different_registers(method_result, intf_klass, scan_temp); + assert(recv_klass != method_result || !return_method, + "recv_klass can be destroyed when method isn't needed"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = in_bytes(InstanceKlass::vtable_start_offset()); + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size_in_bytes(); + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + ldr(scan_temp, Address(recv_klass, in_bytes(InstanceKlass::vtable_length_offset()))); + + // %%% Could store the aligned, prescaled offset in the klassoop. + // lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); + lea(scan_temp, Address(recv_klass, scan_temp, lsl(2))); + add(scan_temp, scan_temp, vtable_base); + + if (return_method) { + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + // lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); + lea(recv_klass, itable_index.is_register() ? + Address(recv_klass, itable_index, lsl(2)) : + Address(recv_klass, itable_index.as_constant() << 2)); + if (itentry_off) + add(recv_klass, recv_klass, itentry_off); + } + + // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { + // if (scan->interface() == intf) { + // result = (klass + scan->offset() + itable_index); + // } + // } + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + ldr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + cmp(intf_klass, method_result); + + if (peel) { + b(found_method, Assembler::EQ); + } else { + b(search, Assembler::NE); + // (invert the test to fall through to found_method...) + } + + if (!peel) break; + + bind(search); + + // Check that the previous entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + cbz(method_result, L_no_such_interface); + add(scan_temp, scan_temp, scan_step); + } + + bind(found_method); + + if (return_method) { + // Got a hit. + ldr(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); + ldr(method_result, Address(recv_klass, scan_temp)); + } +} + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + const int base = in_bytes(InstanceKlass::vtable_start_offset()); + int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes(); + if (vtable_index.is_register()) { + lea(method_result, Address(recv_klass, + vtable_index.as_register(), + lsl(LogBytesPerWord))); + ldr(method_result, Address(method_result, vtable_offset_in_bytes)); + } else { + vtable_offset_in_bytes += vtable_index.as_constant() * wordSize; + if(is_valid_for_offset_imm(vtable_offset_in_bytes, 12)) { + ldr(method_result, Address(recv_klass, vtable_offset_in_bytes)); + } else { + mov(method_result, vtable_offset_in_bytes); + ldr(method_result, Address(recv_klass, method_result)); + } + } +} + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg); + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + Address super_check_offset_addr(super_klass, sco_offset); + + // Hacked jmp, which may only be used just before L_fallthrough. +#define final_jmp(label) \ + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ + else b(label) /*omit semi*/ + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmp(sub_klass, super_klass); + b(*L_success, Assembler::EQ); + + // Check the supertype display: + if (must_load_sco) { + ldr(temp_reg, super_check_offset_addr); + super_check_offset = RegisterOrConstant(temp_reg); + } + Address super_check_addr(sub_klass, super_check_offset); + ldr(rscratch1, super_check_addr); + cmp(super_klass, rscratch1); // load displayed supertype + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + b(*L_success, Assembler::EQ); + cmp(super_check_offset.as_register(), sc_offset); + if (L_failure == &L_fallthrough) { + b(*L_slow_path, Assembler::EQ); + } else { + b(*L_failure, Assembler::NE); + final_jmp(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + b(*L_success, Assembler::EQ); + } else { + b(*L_slow_path, Assembler::NE); + final_jmp(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + b(*L_success, Assembler::EQ); + } else { + b(*L_failure, Assembler::NE); + final_jmp(*L_success); + } + } + + bind(L_fallthrough); + +#undef final_jmp +} + +// These two are taken from x86, but they look generally useful + +// scans count pointer sized words at [addr] for occurence of value, +// generic +void MacroAssembler::repne_scan(Register addr, Register value, Register count, + Register scratch) { + Label loop, fail, found; + cmp(count, 0); + b(fail, EQ); + + bind(loop); + ldr(scratch, post(addr, wordSize)); + cmp(value, scratch); + b(found, EQ); + subs(count, count, 1); + b(loop, NE); + + bind(fail); + cmp(sp, 0); // sp never zero + bind(found); +} + +// Form an address from base + offset in Rd. Rd may or may +// not actually be used: you must use the Address that is returned. +// It is up to you to ensure that the shift provided matches the size +// of your data. +Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset, int shift) { + // form_address result should only be used together with ldr/str instructions + // otherwise please provide exact type instead of IDT_INT or apply safe_for() + if (Address::offset_ok_for_immed(byte_offset, Address::IDT_INT)) + // It fits; no need for any heroics + return Address(base, byte_offset); + + // See if we can do this with two 12-bit offsets + { + unsigned long masked_offset = byte_offset & ~0xfff; + if (Address::offset_ok_for_immed(byte_offset - masked_offset, Address::IDT_INT) + && Assembler::operand_valid_for_add_sub_immediate(masked_offset)) { + add(Rd, base, masked_offset); + byte_offset -= masked_offset; + return Address(Rd, byte_offset); + } + } + + // Do it the hard way + mov(Rd, byte_offset); + add(Rd, base, Rd); + return Address(Rd); +} + +// scans count 4 byte words at [addr] for occurence of value, +// generic +/*void MacroAssembler::repne_scanw(Register addr, Register value, Register count, + Register scratch) { + Label Lloop, Lexit; + cbz(count, Lexit); + bind(Lloop); + ldr(scratch, post(addr, wordSize)); + cmp(value, scratch); + b(Lexit, EQ); + sub(count, count, 1); + cbnz(count, Lloop); + bind(Lexit); +}*/ + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + assert_different_registers(sub_klass, super_klass, temp_reg); + if (temp2_reg != noreg) + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, rscratch1); +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + BLOCK_COMMENT("check_klass_subtype_slow_path"); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connections with the input regs. + + assert(sub_klass != r0, "killed reg"); // killed by mov(r0, super) + assert(sub_klass != r2, "killed reg"); // killed by lea(r2, &pst_counter) + + RegSet pushed_registers; + if (!IS_A_TEMP(r2)) pushed_registers += r2; + if (!IS_A_TEMP(r14)) pushed_registers += r14; + + if (super_klass != r0) { + if (!IS_A_TEMP(r0)) pushed_registers += r0; + } + + push(pushed_registers, sp); + + // Get super_klass value into r0 (even if it was in r5 or r2). + if (super_klass != r0) { + mov(r0, super_klass); + } + +#ifndef PRODUCT + mov(rscratch2, (address)&SharedRuntime::_partial_subtype_ctr); + Address pst_counter_addr(rscratch2); + ldr(rscratch1, pst_counter_addr); + add(rscratch1, rscratch1, 1); + str(rscratch1, pst_counter_addr); +#endif //PRODUCT + + // We will consult the secondary-super array. + ldr(r14, secondary_supers_addr); + // Load the array length. + ldr(r2, Address(r14, Array::length_offset_in_bytes())); + // Skip to start of data. + add(r14, r14, Array::base_offset_in_bytes()); + + cmp(sp, 0); // Clear Z flag; SP is never zero + // Scan R2 words at [R14] for an occurrence of R0. + // Set NZ/Z based on last compare. + repne_scan(r14, r0, r2, rscratch1); + + // Unspill the temp. registers: + pop(pushed_registers, sp); + + b(*L_failure, Assembler::NE); + + // Success. Cache the super we found and proceed in triumph. + str(super_klass, super_cache_addr); + + if (L_success != &L_fallthrough) { + b(*L_success); + } + +#undef IS_A_TEMP + + bind(L_fallthrough); +} + + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) return; + + // Pass register number to verify_oop_subroutine + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("verify_oop: %s: %s", reg->name(), s); + b = code_string(ss.as_string()); + } + BLOCK_COMMENT("verify_oop {"); + + stmdb(sp, RegSet::of(r0, r1, rscratch1, rscratch2, lr).bits()); + + mov(r0, reg); + mov(rscratch1, (address)b); + mrs(r1); + + // call indirectly to solve generation ordering problem + reg_printf("Verify oop entry, sp = %p, rfp = %p\n", sp, rfp); + lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); + ldr(rscratch2, Address(rscratch2)); + bl(rscratch2); + reg_printf("Verify oop exit, sp = %p, rfp = %p\n", sp, rfp); + + msr(r1); + ldmia(sp, RegSet::of(r0, r1, rscratch1, rscratch2, lr).bits()); + + BLOCK_COMMENT("} verify_oop"); +} + +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + if (!VerifyOops) return; + + const char* b = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("verify_oop_addr: %s", s); + b = code_string(ss.as_string()); + } + BLOCK_COMMENT("verify_oop_addr {"); + + stmdb(sp, RegSet::of(r0, r1, rscratch1, rscratch2, lr).bits()); + mrs(r1); + + // addr may contain sp so we will have to adjust it based on the + // pushes that we just did. + if (addr.uses(sp)) { + lea(r0, addr); + ldr(r0, Address(r0, 5 * wordSize)); + } else { + ldr(r0, addr); + } + mov(rscratch1, (address)b); + + // call indirectly to solve generation ordering problem + lea(rscratch2, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); + ldr(rscratch2, Address(rscratch2)); + bl(rscratch2); + + msr(r1); + ldmia(sp, RegSet::of(r0, r1, rscratch1, rscratch2, lr).bits()); + + BLOCK_COMMENT("} verify_oop_addr"); +} + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + if (arg_slot.is_constant()) { + return Address(sp, arg_slot.as_constant() * stackElementSize + + offset); + } else { + add(rscratch1, sp, arg_slot.as_register(), + lsl(exact_log2(stackElementSize))); + return Address(rscratch1, offset); + } +} + +void MacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments, + Label *retaddr) { + Label E, L; + + //FIXME Do this alignment in a more elegant way + mov(rscratch2, sp); + sub(sp, sp, wordSize); + bic(sp, sp, 2 * wordSize - 1); // Align to eight bytes + str(rscratch2, Address(sp)); + + // FIXME Do we need to preserve rscratch2? + //str(rscratch2, Address(pre(sp, -wordSize))); + + mov(rscratch2, entry_point); + reg_printf("\tJust about to call into the VM, rfp = %p\n", rfp); + bl(rscratch2); + if (retaddr) + bind(*retaddr); + reg_printf("\tReturned from call into the VM, rfp = %p\n", rfp); + + //ldr(rscratch2, Address(post(sp, wordSize))); + + //Undo alignment + ldr(sp, Address(sp)); + + maybe_isb(); +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + pass_arg0(this, arg_0); + pass_arg1(this, arg_1); + call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, + Register arg_1, Register arg_2) { + pass_arg0(this, arg_0); + pass_arg1(this, arg_1); + pass_arg2(this, arg_2); + call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { + assert(arg_0 != c_rarg3, "smashed arg"); + assert(arg_1 != c_rarg3, "smashed arg"); + assert(arg_2 != c_rarg3, "smashed arg"); + pass_arg3(this, arg_3); + assert(arg_0 != c_rarg2, "smashed arg"); + assert(arg_1 != c_rarg2, "smashed arg"); + pass_arg2(this, arg_2); + assert(arg_0 != c_rarg1, "smashed arg"); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 4); +} + +// Clobbers rscratch1 +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any registers + // NOTE: this is plenty to provoke a segv + reg_printf("Generating OS check null with ptr = %p\n", reg); + assert(reg != rscratch1, "can't be"); + ldr(rscratch1, Address(reg)); + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +// MacroAssembler protected routines needed to implement +// public methods + +void MacroAssembler::mov(Register r, Address dest, Condition cond) { + code_section()->relocate(pc(), dest.rspec()); + uint32_t imm32 = (uint32_t)dest.target(); + movptr(r, imm32, cond); +} + +// Move a constant pointer into r. In aarch32 address space +// is 32 bits in size and so a pointer can be encoded in two mov +// instructions. +void MacroAssembler::movptr(Register r, uintptr_t imm32, Condition cond) { +#ifndef PRODUCT + { + char buffer[64]; + snprintf(buffer, sizeof(buffer), "0x%"PRIX32, imm32); + block_comment(buffer); + } +#endif + Assembler::mov_immediate32(r, imm32, cond, false); +} + +void MacroAssembler::ret(Register reg) { + assert(reg == lr, "Can do return only to LR"); + b(lr); +} + +void MacroAssembler::atomic_inc(Register counter_addr, Register tmp) { + Label retry_load; + bind(retry_load); + // flush and load exclusive from the memory location + ldrex(tmp, counter_addr); + add(tmp, tmp, 1); + // if we store+flush with no intervening write tmp wil be zero + strex(tmp, tmp, counter_addr); + cmp(tmp, 0); + b(retry_load, Assembler::NE); +} + + +// MacroAssembler routines found actually to be needed + +void MacroAssembler::push(Register src) +{ + str(src, Address(pre(sp, -1 * wordSize))); +} + +void MacroAssembler::pop(Register dst) +{ + ldr(dst, Address(post(sp, 1 * wordSize))); +} + +// Note: load_unsigned_short used to be called load_unsigned_word. +int MacroAssembler::load_unsigned_short(Register dst, Address src) { + int off = offset(); + ldrh(dst, src); + return off; +} + +int MacroAssembler::load_unsigned_byte(Register dst, Address src) { + int off = offset(); + ldrb(dst, src); + return off; +} + +int MacroAssembler::load_signed_short(Register dst, Address src) { + int off = offset(); + ldrsh(dst, src); + return off; +} + +int MacroAssembler::load_signed_byte(Register dst, Address src) { + int off = offset(); + ldrsb(dst, src); + return off; +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { + //case 8: ldr(dst, src); break; + case 4: ldr(dst, src); break; + case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; + case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + switch (size_in_bytes) { + //case 8: str(src, dst); break; + case 4: str(src, dst); break; + case 2: strh(src, dst); break; + case 1: strb(src, dst); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::decrement(Register reg, int value) { + if (value < 0) { + increment(reg, -value); + return; + } + if (value == 0) { + return; + } + if (operand_valid_for_add_sub_immediate(value)) { + sub(reg, reg, value); + return; + } + assert(reg != rscratch2, "invalid register for decrement"); + mov(rscratch2, (unsigned int) value); + sub(reg, reg, rscratch2); +} + +void MacroAssembler::decrement(Address dst, int value) { + assert(!dst.uses(rscratch1), "invalid address for decrement"); + ldr(rscratch1, dst); + decrement(rscratch1, value); + str(rscratch1, dst); +} + +void MacroAssembler::increment(Register reg, int value) { + if (value < 0) { + decrement(reg, -value); + return; + } + if (value == 0) { + return; + } + if (operand_valid_for_add_sub_immediate(value)) { + add(reg, reg, value); + return; + } + assert(reg != rscratch2, "invalid register for increment"); + mov(rscratch2, (unsigned int) value); + add(reg, reg, rscratch2); +} + +void MacroAssembler::increment(Address dst, int value) { + assert(!dst.uses(rscratch1), "invalid address for increment"); + ldr(rscratch1, dst); + increment(rscratch1, value); + str(rscratch1, dst); +} + +// Loads and stores everything except the pc and sp +void MacroAssembler::pusha() { + unsigned regset = 0b0101111111111111; + stmdb(sp, regset); +} +void MacroAssembler::popa() { + unsigned regset = 0b0101111111111111; + ldmia(sp, regset); +} + +static void multiple_reg_check(unsigned int bitset, Register stack) { + const unsigned int pcbit = 1 << r15_pc->encoding(); + const unsigned int lrbit = 1 << lr->encoding(); + const unsigned int spbit = 1 << sp->encoding(); + const unsigned int stackbit = 1 << stack->encoding(); + assert(!(bitset & spbit), "The SP can be in the list. However, " + "ARM deprecates using these instructions with SP in the list."); + assert(!(bitset & pcbit) || !(bitset & lrbit), + "ARM deprecates using these instructions with both " + "the LR and the PC in the list."); + assert(!(bitset & stackbit), "Instructions with the base register " + "in the list and ! specified are only available before ARMv7, " + "and ARM deprecates the use of such instructions. " + "The value of the base register after such an instruction is UNKNOWN"); +} + +// Push lots of registers in the bit set supplied. Don't push sp. +// Return the number of words pushed +int MacroAssembler::push(unsigned int bitset, Register stack) { + multiple_reg_check(bitset, stack); + unsigned bc = bitset, count = 0, i; + for(i = 0; i <= 15; i++) { + if (1 & bc) count++; + bc >>= 1; + } + // TODO Also why did it only do even quantities before? + stmdb(stack, bitset); + return count; +} + +int MacroAssembler::pop(unsigned int bitset, Register stack) { + multiple_reg_check(bitset, stack); + unsigned bc = bitset, count = 0, i; + for(i = 0; i <= 15; i++) { + if (1 & bc) count++; + bc >>= 1; + } + // TODO Also why did it only do even quantities before? + ldmia(stack, bitset); + return count; +} + +void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) { + Label done, not_weak; + cbz(value, done); // Use NULL as-is. + + STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); + tbz(value, 0, not_weak); // Test for jweak tag. + + // Resolve jweak. + + access_load_word_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, + value, Address(value, -JNIHandles::weak_tag_value), tmp, noreg); + verify_oop(value); + b(done); + + + bind(not_weak); + // Resolve (untagged) jobject. + access_load_word_at(T_OBJECT, IN_NATIVE, value, Address(value), tmp, noreg); + verify_oop(value); + bind(done); +} + +void MacroAssembler::stop(const char* msg) { + pusha(); + // Save old sp value + add(rscratch2, sp, 14 * wordSize); + str(rscratch2, Address(pre(sp, -4))); + mov(c_rarg0, (address)msg); + mov(c_rarg1, r15_pc); + sub(c_rarg1, c_rarg1, 8); // Restore to actual value + mov(c_rarg2, sp); + mov(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug32)); + bl(c_rarg3); + hlt(0); +} + +void MacroAssembler::unimplemented(const char* what) { + const char* buf = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("unimplemented: %s", what); + buf = code_string(ss.as_string()); + } + stop(buf); +} + +// this simulates the behaviour of the x86 cmpxchg instruction using a +// load linked/store conditional pair. we use the acquire/release +// versions of these instructions so that we flush pending writes as +// per Java semantics. + +// n.b the x86 version assumes the old value to be compared against is +// in rax and updates rax with the value located in memory if the +// cmpxchg fails. we supply a register for the old value explicitly + +// the aarch32 load linked/store conditional instructions do not +// accept an offset. so, unlike x86, we must provide a plain register +// to identify the memory word to be compared/exchanged rather than a +// register+offset Address. + +void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, + Label &succeed, Label *fail) { + // oldv holds comparison value + // newv holds value to write in exchange + // addr identifies memory word to compare against/update + // tmp returns 0/1 for success/failure + Label retry_load, nope; + + bind(retry_load); + // flush and load exclusive from the memory location + // and fail if it is not what we expect + ldrex(tmp, addr); + cmp(tmp, oldv); + b(nope, Assembler::NE); + // if we store+flush with no intervening write tmp wil be zero + strex(tmp, newv, addr); + cmp(tmp, 0); + b(succeed, Assembler::EQ); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + b(retry_load); + // if the memory word differs we return it in oldv and signal a fail + bind(nope); + membar(AnyAny); + mov(oldv, tmp); + if (fail) + b(*fail); +} + +void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, + Label &succeed, Label *fail) { + assert(oopDesc::mark_offset_in_bytes() == 0, "assumption"); + cmpxchgptr(oldv, newv, obj, tmp, succeed, fail); +} + +void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, + Label &succeed, Label *fail) { + // oldv holds comparison value + // newv holds value to write in exchange + // addr identifies memory word to compare against/update + // tmp returns 0/1 for success/failure + Label retry_load, nope; + + bind(retry_load); + // flush and load exclusive from the memory location + // and fail if it is not what we expect + ldrex(tmp, addr); + cmp(tmp, oldv); + b(nope, Assembler::NE); + // if we store+flush with no intervening write tmp wil be zero + strex(tmp, newv, addr); + cmp(tmp, 0); + b(succeed, Assembler::EQ); + // retry so we only ever return after a load fails to compare + // ensures we don't return a stale value after a failed write. + b(retry_load); + // if the memory word differs we return it in oldv and signal a fail + bind(nope); + membar(AnyAny); + mov(oldv, tmp); + if (fail) + b(*fail); +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug32(char* msg, int32_t pc, int32_t regs[]) +{ + print_unseen_bytecodes(); + // In order to get locks to work, we need to fake a in_VM state + if (ShowMessageBoxOnError) { + JavaThread* thread = JavaThread::current(); + JavaThreadState saved_state = thread->thread_state(); + thread->set_thread_state(_thread_in_vm); +#ifndef PRODUCT + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + ttyLocker ttyl; + BytecodeCounter::print(); + } +#endif + if (os::message_box(msg, "Execution stopped, print registers?")) { + ttyLocker ttyl; + tty->print_cr(" pc = 0x%016x", pc); +#ifndef PRODUCT + tty->cr(); + findpc(pc); + tty->cr(); +#endif + tty->print_cr("THIS IS WRONG!"); + tty->print_cr(" r0 = 0x%016x", regs[0]); + tty->print_cr(" r1 = 0x%016x", regs[1]); + tty->print_cr(" r2 = 0x%016x", regs[2]); + tty->print_cr(" r3 = 0x%016x", regs[3]); + tty->print_cr(" r4 = 0x%016x", regs[4]); + tty->print_cr(" r5 = 0x%016x", regs[5]); + tty->print_cr(" r6 = 0x%016x", regs[6]); + tty->print_cr(" r7 = 0x%016x", regs[7]); + tty->print_cr(" r8 = 0x%016x", regs[8]); + tty->print_cr(" r9 = 0x%016x", regs[9]); + tty->print_cr("r10 = 0x%016x", regs[10]); + tty->print_cr("r11 = 0x%016x", regs[11]); + tty->print_cr("r12 = 0x%016x", regs[12]); + tty->print_cr("r13 = 0x%016x", regs[13]); + tty->print_cr("r14 = 0x%016x", regs[14]); + tty->print_cr("r15 = 0x%016x", regs[15]); + BREAKPOINT; + } + ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); + } else { + { + ttyLocker ttyl; + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================", msg); + ::tty->print_cr(" r0 [ arg0 ] = 0x%08x", regs[1]); + ::tty->print_cr(" r1 [ arg1 ] = 0x%08x", regs[2]); + ::tty->print_cr(" r2 [ arg2 ] = 0x%08x", regs[3]); + ::tty->print_cr(" r3 [ arg3 ] = 0x%08x", regs[4]); + ::tty->print_cr(" r4 [ rdispatch ] = 0x%08x", regs[5]); + ::tty->print_cr(" r5 [ rbcp ] = 0x%08x", regs[6]); + ::tty->print_cr(" r6 [ rlocals ] = 0x%08x", regs[7]); + ::tty->print_cr(" r7 [ rcpool ] = 0x%08x", regs[8]); + ::tty->print_cr(" r8 [ rmethod ] = 0x%08x", regs[9]); + ::tty->print_cr(" r9 [ rscratch1 ] = 0x%08x", regs[10]); + ::tty->print_cr("r10 [ rthread ] = 0x%08x", regs[11]); + ::tty->print_cr("r11 [ rfp ] = 0x%08x", regs[12]); + ::tty->print_cr("r12 [ rscratch2 ] = 0x%08x", regs[13]); + ::tty->print_cr("r13 [ sp ] = 0x%08x", regs[0]); + ::tty->print_cr("r14 [ lr ] = 0x%08x", regs[14]); + ::tty->print_cr("r15 [ pc ] = 0x%08x", pc); + } + assert(false, "DEBUG MESSAGE: %s", msg); + } +} + +void MacroAssembler::push_call_clobbered_registers() { + push(RegSet::range(r0, r3), sp); + if(hasFPU()) { + const int nfloat = 16; // number of callee-saved 32-bit float registers + vstmdb_f64(sp, (1 << nfloat/2) - 1); + } +} + +void MacroAssembler::pop_call_clobbered_registers() { + if(hasFPU()) { + const int nfloat = 16; // number of callee-saved 32-bit float registers + vldmia_f64(sp, (1 << nfloat/2) - 1); + } + pop(RegSet::range(r0, r3), sp); +} + +void MacroAssembler::push_CPU_state() { + // if fix this, update also RegisterSaved::save_live_registers and it's map + push(0x5fff, sp); // integer registers except sp & (aarch32 pc) + + if(hasFPU()) { + const int nfloat = FPUStateSizeInWords / 2; // saved by pairs + vstmdb_f64(sp, (1 << nfloat) - 1); + } else { + sub(sp, sp, FPUStateSizeInWords * wordSize); + } +} + +void MacroAssembler::pop_CPU_state() { + if(hasFPU()) { + const int nfloat = FloatRegisterImpl::number_of_registers / 2; + vldmia_f64(sp, (1 << nfloat) - 1); + } else { + add(sp, sp, FPUStateSizeInWords * wordSize); + } + + pop(0x5fff, sp); // integer registers except sp & (aarch32 pc) +} + +// appears this needs to round up! +void MacroAssembler::round_to(Register reg, int modulus) { + // from x86 + add(reg, reg, modulus - 1); + bic(reg, reg, modulus - 1); // and( reg, -modulus) +} + +SkipIfEqual::SkipIfEqual( + MacroAssembler* masm, const bool* flag_addr, bool value) { + _masm = masm; + _masm->mov(rscratch1, ExternalAddress((address)flag_addr)); + _masm->ldrb(rscratch1, rscratch1); + _masm->cmp(rscratch1, 0); + _masm->b(_label, value ? Assembler::NE : Assembler::EQ); +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} + +void MacroAssembler::cmpptr(Register src1, Address src2) { + mov(rscratch1, src2); + ldr(rscratch1, Address(rscratch1)); + cmp(src1, rscratch1); +} + +void MacroAssembler::cmpoop(Register obj1, Register obj2) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->obj_equals(this, obj1, obj2); +} + +void MacroAssembler::load_klass(Register dst, Register src) { + ldr(dst, Address(src, oopDesc::klass_offset_in_bytes())); +} + +// ((OopHandle)result).resolve(); +void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { + // OopHandle::resolve is an indirection. + access_load_word_at(T_OBJECT, IN_NATIVE, result, Address(result), tmp, noreg); +} + +void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) { + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + ldr(dst, Address(rmethod, Method::const_offset())); + ldr(dst, Address(dst, ConstMethod::constants_offset())); + ldr(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes())); + ldr(dst, Address(dst, mirror_offset)); + resolve_oop_handle(dst, tmp); +} + +void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) { + ldr(tmp, Address(oop, oopDesc::klass_offset_in_bytes())); + cmp(trial_klass, tmp); +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { + load_klass(dst, src); + ldr(dst, Address(dst, Klass::prototype_header_offset())); +} + +void MacroAssembler::store_klass(Register dst, Register src) { + str(src, Address(dst, oopDesc::klass_offset_in_bytes())); +} + +void MacroAssembler::store_klass_gap(Register dst, Register src) { } + +void MacroAssembler::access_load_word_at(BasicType type, DecoratorSet decorators, + Register dst, Address src, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::load_word_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->load_word_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +void MacroAssembler::access_store_word_at(BasicType type, DecoratorSet decorators, + Address dst, Register src, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::store_word_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } else { + bs->store_word_at(this, decorators, type, dst, src, tmp1, thread_tmp); + } +} + +void MacroAssembler::access_load_tos_at(BasicType type, DecoratorSet decorators, + Address src, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::load_tos_at(this, decorators, type, src, tmp1, thread_tmp); + } else { + bs->load_tos_at(this, decorators, type, src, tmp1, thread_tmp); + } +} + +void MacroAssembler::access_store_tos_at(BasicType type, DecoratorSet decorators, + Address dst, + Register tmp1, Register thread_tmp) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + decorators = AccessInternal::decorator_fixup(decorators); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::store_tos_at(this, decorators, type, dst, tmp1, thread_tmp); + } else { + bs->store_tos_at(this, decorators, type, dst, tmp1, thread_tmp); + } +} + +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_word_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_load_word_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); +} + +void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, + Register thread_tmp, DecoratorSet decorators) { + access_store_word_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); +} + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst, Register tmp) { + access_store_word_at(T_OBJECT, IN_HEAP, dst, noreg, tmp, noreg); +} + +Address MacroAssembler::allocate_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs a Recorder"); + int index = oop_recorder()->allocate_metadata_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return Address((address)obj, rspec); +} + +// Move an oop into a register. immediate is true if we want +// immediate instrcutions, i.e. we are not going to patch this +// instruction while the code is being executed by another thread. In +// that case we can use move immediates rather than the constant pool. +void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { + int oop_index; + if (obj == NULL) { + oop_index = oop_recorder()->allocate_oop_index(obj); + } else { +#ifdef ASSERT + { + ThreadInVMfromUnknown tiv; + assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); + } +#endif + oop_index = oop_recorder()->find_index(obj); + } + if (! immediate) { + far_load_oop(dst, oop_index); + } else { + RelocationHolder rspec = oop_Relocation::spec(oop_index); + mov(dst, Address((address)obj, rspec)); + } +} + +// Move a metadata address into a register. +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + int oop_index; + if (obj == NULL) { + oop_index = oop_recorder()->allocate_metadata_index(obj); + } else { + oop_index = oop_recorder()->find_index(obj); + } + RelocationHolder rspec = metadata_Relocation::spec(oop_index); + mov(dst, Address((address)obj, rspec)); +} + +void MacroAssembler::far_load(Register dst, address addr) { + address far_load_addr = pc(); + add(dst, r15_pc, 0); + ldr(dst, Address(dst)); + + NativeFarLdr* far_load = (NativeFarLdr*) far_load_addr; + far_load->set_data_addr((intptr_t*) addr); +} + +void MacroAssembler::far_load_oop(Register dst, int oop_index) { + relocate(oop_Relocation::spec(oop_index)); + // can't provide meaningful addr, give far_load addr itself + far_load(dst, pc()); +} + +void MacroAssembler::far_load_metadata(Register dst, int metadata_index) { + relocate(metadata_Relocation::spec(metadata_index)); + // can't provide meaningful addr, give far_load addr itself + far_load(dst, pc()); +} + +void MacroAssembler::far_load_const(Register dst, address const_addr) { + relocate(section_word_Relocation::spec(const_addr, CodeBuffer::SECT_CONSTS)); + far_load(dst, const_addr); +} + +Address MacroAssembler::constant_oop_address(jobject obj) { +#ifdef ASSERT + { + ThreadInVMfromUnknown tiv; + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop"); + } +#endif + int oop_index = oop_recorder()->find_index(obj); + return Address((address)obj, oop_Relocation::spec(oop_index)); +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Register t2, + Label& slow_case) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); +} + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) { + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); +} + +// Zero words; len is in bytes +// Destroys all registers except addr +// len must be a nonzero multiple of wordSize +void MacroAssembler::zero_memory(Register addr, Register len, Register t1) { + assert_different_registers(addr, len, t1, rscratch1, rscratch2); + +#ifdef ASSERT + { Label L; + tst(len, BytesPerWord - 1); + b(L, Assembler::EQ); + stop("len is not a multiple of BytesPerWord"); + bind(L); + } +#endif + +#ifndef PRODUCT + block_comment("zero memory"); +#endif + + Label loop; + Label entry; + +// Algorithm: +// +// scratch1 = cnt & 7; +// cnt -= scratch1; +// p += scratch1; +// switch (scratch1) { +// do { +// cnt -= 8; +// p[-8] = 0; +// case 7: +// p[-7] = 0; +// case 6: +// p[-6] = 0; +// // ... +// case 1: +// p[-1] = 0; +// case 0: +// p += 8; +// } while (cnt); +// } + + const int unroll = 8; // Number of str instructions we'll unroll + + lsr(len, len, LogBytesPerWord); + andr(rscratch1, len, unroll - 1); // tmp1 = cnt % unroll + sub(len, len, rscratch1); // cnt -= unroll + // t1 always points to the end of the region we're about to zero + add(t1, addr, rscratch1, lsl(LogBytesPerWord)); + adr(rscratch2, entry); + sub(rscratch2, rscratch2, rscratch1, lsl(2)); + mov(rscratch1, 0); + b(rscratch2); + bind(loop); + sub(len, len, unroll); + for (int i = -unroll; i < 0; i++) + str(rscratch1, Address(t1, i * wordSize)); + bind(entry); + add(t1, t1, unroll * wordSize); + cbnz(len, loop); +} + +void MacroAssembler::verify_tlab() { +#ifdef ASSERT + if (UseTLAB && VerifyOops) { + Label next, ok; + + strd(rscratch2, rscratch1, Address(pre(sp, -16))); + + ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); + ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_start_offset()))); + cmp(rscratch2, rscratch1); + b(next, Assembler::HS); + STOP("assert(top >= start)"); + should_not_reach_here(); + + bind(next); + ldr(rscratch2, Address(rthread, in_bytes(JavaThread::tlab_end_offset()))); + ldr(rscratch1, Address(rthread, in_bytes(JavaThread::tlab_top_offset()))); + cmp(rscratch2, rscratch1); + b(ok, Assembler::HS); + STOP("assert(top <= end)"); + should_not_reach_here(); + + bind(ok); + ldrd(rscratch2, rscratch1, Address(post(sp, 16))); + } +#endif +} + +// Writes to stack successive pages until offset reached to check for +// stack overflow + shadow pages. This clobbers tmp. +void MacroAssembler::bang_stack_size(Register size, Register tmp) { + assert_different_registers(tmp, size, rscratch1); + mov(tmp, sp); + // Bang stack for total size given plus shadow page size. + // Bang one page at a time because large size can bang beyond yellow and + // red zones. + Label loop; + mov(rscratch1, os::vm_page_size()); + bind(loop); + lea(tmp, Address(tmp, -os::vm_page_size())); + subs(size, size, rscratch1); + str(size, Address(tmp)); + b(loop, Assembler::GT); + + // Bang down shadow pages too. + // At this point, (tmp-0) is the last address touched, so don't + // touch it again. (It was touched as (tmp-pagesize) but then tmp + // was post-decremented.) Skip this address by starting at i=1, and + // touch a few more pages below. N.B. It is important to touch all + // the way down to and including i=StackShadowPages. + for (int i = 0; i < (int)(JavaThread::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) { + // this could be any sized move but this is can be a debugging crumb + // so the bigger the better. + lea(tmp, Address(tmp, -os::vm_page_size())); + str(size, Address(tmp)); + } +} + + +// Move the address of the polling page into dest. +void MacroAssembler::get_polling_page(Register dest, address page, relocInfo::relocType rtype) { + if (SafepointMechanism::uses_thread_local_poll()) { + ldr(dest, Address(rthread, Thread::polling_page_offset())); + } else { + mov(dest, Address(page, rtype)); + } +} + +// Move the address of the polling page into r, then read the polling +// page. +address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) { + get_polling_page(r, page, rtype); + return read_polling_page(r, rtype); +} + +address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) { + InstructionMark im(this); + code_section()->relocate(inst_mark(), rtype); + // It's ok to load to reg from reg + off (without write-back) + ldr(r, Address(r, 0)); + return inst_mark(); +} + +// Helper functions for 64-bit multipliction, division and remainder +// does = * +void MacroAssembler::mult_long(Register Rd, Register Rn, Register Rm) { + Register Rdh = (Register)(Rd->encoding_nocheck() + 1); + Register Rnh = (Register)(Rn->encoding_nocheck() + 1); + Register Rmh = (Register)(Rm->encoding_nocheck() + 1); + + mult_long(Rd, Rdh, Rn, Rnh, Rm, Rmh); +} + +// does = * +void MacroAssembler::mult_long(Register Rd, Register Rdh, Register Rn, Register Rnh, Register Rm, Register Rmh) { + assert_different_registers(Rn, Rnh); + assert_different_registers(Rm, Rmh); + assert_different_registers(Rd, Rdh); // umull restriction + const Register t = rscratch1; + + mul(t, Rm, Rnh); + mla(t, Rn, Rmh, t); + umull(Rd, Rdh, Rm, Rn); + add(Rdh, t, Rdh); +} + + +int64_t internal_ldiv(int64_t a, int64_t b) { + return a / b; +} + +int64_t internal_lmod(int64_t a, int64_t b) { + return a % b; +} + +void MacroAssembler::divide32(Register res, Register num, Register den, bool want_mod) { + Register cnt = rscratch1; + Register mod = rscratch2; + Register sign = r14; + assert_different_registers(num, den, rscratch1, rscratch2, r14); + + // FIXME This works by first converting any negative values to positive ones, however + // it is not possible to express |INT_MIN|. Need to fix this + + //Convert to positive values + mov(sign, 0); + + cmp(num, 0); + mov(sign, 1, MI); + rsb(num, num, 0, MI); + + cmp(den, 0); + if(!want_mod) eor(sign, sign, 1, MI); + rsb(den, den, 0, MI); + + // Algorithm from + // http://www.chiark.greenend.org.uk/~theom/riscos/docs/ultimate/a252div.txt + // Graeme Williams + mov(cnt, 28); + mov(mod, num, lsr(4)); + cmp(den, mod, lsr(12)); + sub(cnt, cnt, 16, Assembler::LE); + mov(mod, mod, lsr(16), Assembler::LE); + cmp(den, mod, lsr(4)); + sub(cnt, cnt, 8, Assembler::LE); + mov(mod, mod, lsr(8), Assembler::LE); + cmp(den, mod); + sub(cnt, cnt, 4, Assembler::LE); + mov(mod, mod, lsr(4), Assembler::LE); + mov(num, num, lsl(cnt)); + rsb(den, den, 0); + + adds(num, num, num); + //Now skip over cnt copies of the 3 instr. loop. + add(cnt, cnt, cnt, lsl(1)); + add(r15_pc, r15_pc, cnt, lsl(2)); + mov(r0, r0); + + for(int i = 0; i < 32; i++) { + adcs(mod, den, mod, lsl(1)); + sub(mod, mod, den, Assembler::LO); + adcs(num, num, num); + } + + cmp(sign, 0); + rsb(res, want_mod? mod : num, 0, NE); + mov(res, want_mod? mod : num, EQ); +} + + +// = / +// = % +// = / +// = % +void MacroAssembler::divide(Register Rd, Register Rn, Register Rm, int width, bool want_remainder) { + //Dispatch to best possible + Register Rdh = (Register)(Rd->encoding_nocheck() + 1); + Register Rnh = (Register)(Rn->encoding_nocheck() + 1); + Register Rmh = (Register)(Rm->encoding_nocheck() + 1); + + assert(32 == width || 64 == width, "Invalid width"); + bool is64b = 64 == width; + + if(is64b) { + assert_different_registers(Rn, Rnh, Rm, Rmh, rscratch1, rscratch2); + } + + if(!is64b && VM_Version::features() & FT_HW_DIVIDE) { + // Emit a hw instruction sequnce. + if(want_remainder) { + sdiv(rscratch1, Rn, Rm); + mls(Rd, rscratch1, Rm, Rn); + } else { + sdiv(Rd, Rn, Rm); + } + } else if(!is64b) { + // Fall back to assembly software routine + divide32(Rd, Rn, Rm, want_remainder); + } else { + // Fall back to C software routine for + // 64 bit divide/mod + if(Rn != r0) { + mov(rscratch1, Rm); + mov(rscratch2, Rmh); + + mov(r0, Rn); + mov(r1, Rnh); + + mov(r2, rscratch1); + mov(r3, rscratch2); + } else if(Rm != r2) { + mov(r2, Rm); + mov(r3, Rmh); + } + address function; + if(want_remainder) function = (address)internal_lmod; + else function = (address)internal_ldiv; + + mov(rscratch1, function); + bl(rscratch1); + if(Rd != r0) { + mov(Rd, r0); + if(is64b) mov(Rdh, r1); + } + } +} + +void MacroAssembler::extract_bits(Register dest, Register source, int lsb, int width) { + assert(lsb >= 0 && lsb + width <= 32 && width != 0, "Invalid lsb/width"); + // Dispatch to the best sequence + if(0 == (lsb & 7) && (width == 8 || width == 16 || width == 32)) { + // Can use extend X + switch(width){ + case 8: uxtb(dest, source, ror(lsb)); break; + case 16: uxth(dest, source, ror(lsb)); break; + default: break; + } + } else if(VM_Version::features() & (FT_ARMV7 | FT_ARMV6T2)) { + ubfx(dest, source, lsb, width); + } else { + // Do two shifts + lsl(dest, source, 32 - (width + lsb)); + lsr(dest, dest, 32 - width); + } +} + + +void MacroAssembler::atomic_ldrd(Register Rt, Register Rt2, Register Rbase) { + assert(Rt->encoding_nocheck() % 2 == 0, "Must be an even register"); + assert((Register) (Rt + 1) == Rt2, "Must be contiguous"); + if(VM_Version::features() & FT_SINGLE_CORE) { + ldrd(Rt, Rbase); + } else if (VM_Version::features() & (FT_ARMV7 | FT_ARMV6K)) { +#ifdef ASSERT + Label lbl; + tst(Rbase, 7); + b(lbl, EQ); + stop("atomic_ldrd is not doubleword aligned!"); + bind(lbl); +#endif // ASSERT + + ldrexd(Rt, Rbase); + } else { + // TODO: Find Java way of logging + static bool warning_printed = false; + if(!warning_printed) { + fprintf(stderr, "Unable to provide atomic doubleword load.\n"); + warning_printed = true; + } + ldrd(Rt, Rbase); + } +} + +void MacroAssembler::atomic_strd(Register Rt, Register Rt2, Register Rbase, + Register temp, Register temp2) { + assert(Rt->encoding_nocheck() % 2 == 0, "Must be an even register"); + assert((Register) (Rt + 1) == Rt2, "Must be contiguous"); + assert((Register) (temp + 1) == temp2, "Must be contiguous"); + assert_different_registers(temp, Rt, Rbase, temp2); + if(VM_Version::features() & FT_SINGLE_CORE) { + strd(Rt, Rbase); + } else if (VM_Version::features() & (FT_ARMV7 | FT_ARMV6K)) { + // First need to gain exclusive access + Label retry; + +#ifdef ASSERT + tst(Rbase, 7); + b(retry, EQ); + stop("atomic_strd is not doubleword aligned!"); +#endif // ASSERT + + bind(retry); + ldrexd(temp, Rbase); + strexd(temp, Rt, Rbase); + cmp(temp, 0); + b(retry, NE); + } else { + // TODO: Find Java way of logging + static bool warning_printed = false; + if(!warning_printed) { + fprintf(stderr, "Unable to provide atomic doubleword store.\n"); + warning_printed = true; + } + strd(Rt, Rbase); + } +} + + +#define ENABLE_DEBUGGING 0 +// Helloworld is 2,482,397 +uint32_t MacroAssembler::bytecodes_until_print = 2400000; //13795328; //6888000L; //6881772L; + +uint32_t MacroAssembler::bytecodes_executed = 0; + +int MacroAssembler::enable_debug = 0; +int MacroAssembler::enable_method_debug = 0; +int MacroAssembler::enable_debugging_static = ENABLE_DEBUGGING; + +#define N_J_BYTECODES 238 +const char* j_bytecodes[N_J_BYTECODES] = {"nop", "aconstnull", "iconstm1", "iconst0", "iconst1", "iconst2", "iconst3", "iconst4", "iconst5", "lconst0", +"lconst1", "fconst0", "fconst1", "fconst2", "dconst0", "dconst1", "bipush", "sipush", "ldc", "ldcw", "ldc2w", +"iload", "lload", "fload", "dload", "aload", "iload0", "iload1", "iload2", "iload3", "lload0", "lload1", "lload2", +"lload3", "fload0", "fload1", "fload2", "fload3", "dload0", "dload1", "dload2", "dload3", "aload0", "aload1", "aload2", +"aload3", "iaload", "laload", "faload", "daload", "aaload", "baload", "caload", "saload", "istore", "lstore", "fstore", +"dstore", "astore", "istore0", "istore1", "istore2", "istore3", "lstore0", "lstore1", "lstore2", "lstore3", "fstore0", +"fstore1", "fstore2", "fstore3", "dstore0", "dstore1", "dstore2", "dstore3", "astore0", "astore1", "astore2", "astore3", +"iastore", "lastore", "fastore", "dastore", "aastore", "bastore", "castore", "sastore", "pop", "pop2", "dup", "dupx1", +"dupx2", "dup2", "dup2x1", "dup2x2", "swap", "iadd", "ladd", "fadd", "dadd", "isub", "lsub", "fsub", "dsub", "imul", +"lmul", "fmul", "dmul", "idiv", "ldiv", "fdiv", "ddiv", "irem", "lrem", "frem", "drem", "ineg", "lneg", "fneg", "dneg", +"ishl", "lshl", "ishr", "lshr", "iushr", "lushr", "iand", "land", "ior", "lor", "ixor", "lxor", "iinc", "i2l", "i2f", +"i2d", "l2i", "l2f", "l2d", "f2i", "f2l", "f2d", "d2i", "d2l", "d2f", "i2b", "i2c", "i2s", "lcmp", "fcmpl", "fcmpg", +"dcmpl", "dcmpg", "ifeq", "ifne", "iflt", "ifge", "ifgt", "ifle", "ificmpeq", "ificmpne", "ificmplt", "ificmpge", +"ificmpgt", "ificmple", "ifacmpeq", "ifacmpne", "goto", "jsr", "ret", "tableswitch", "lookupswitch", "ireturn", +"lreturn", "freturn", "dreturn", "areturn", "return", "getstatic", "putstatic", "getfield", "putfield", +"invokevirtual", "invokespecial", "invokestatic", "invokeinterface", "invokedynamic", "new", "newarray", +"anewarray", "arraylength", "athrow", "checkcast", "instanceof", "monitorenter", "monitorexit", "wide", +"multianewarray", "ifnull", "ifnonnull", "gotow", "jsrw", "breakpoint", "fast_agetfield", "fast_bgetfield", +"fast_cgetfield", "fast_dgetfield", "fast_fgetfield", "fast_igetfield", "fast_lgetfield", "fast_sgetfield", +"fast_aputfield", "fast_bputfield", "fast_cputfield", "fast_dputfield", "fast_fputfield", "fast_iputfield", +"fast_lputfield", "fast_sputfield", "fast_aload_0", "fast_iaccess_0", "fast_aaccess_0", "fast_faccess_0", +"fast_iload", "fast_iload2", "fast_icaload", "fast_invokevfinal", "fast_linearswitch", "fast_binaryswitch", +"fast_aldc", "fast_aldc_w", "return_register_finalizer", "invokehandle", "nofast_getfield", "nofast_putfield", +"nofast_aload_0", "nofast_iload", "INVALID"}; + +int bytecodes_seen[256]; + +void MacroAssembler::init_unseen_bytecodes() { + for(int i = 0; i < 256; i++ ) { + bytecodes_seen[i] = 0; + } +} + +void MacroAssembler::bytecode_seen(Register bc_reg, Register scratch) { + if(ENABLE_DEBUGGING) { + mov(scratch, (address)bytecodes_seen); + add(scratch, scratch, bc_reg, lsl(2)); + add(bc_reg, bc_reg, 1); + str(bc_reg, Address(scratch)); + sub(bc_reg, bc_reg, 1); + } +} + +void MacroAssembler::print_unseen_bytecodes() { + if(ENABLE_DEBUGGING) { + printf("=== Unseen bytecodes ===\n"); + for(int i = 0; i < N_J_BYTECODES; i++) { + if(0 == bytecodes_seen[i]) { + printf("\t%s\n", j_bytecodes[i]); + } + } + printf("=== End unseen ===\n"); + } else { + printf("Not kept track, enable debugging to view info\n"); + } + fflush(stdout); +} + +int machine_state_regset = 0b0101111111111111; +int machine_state_float_regset = 0b11; + +void MacroAssembler::save_machine_state() { + stmdb(sp, machine_state_regset); + if(hasFPU()) { + vstmdb_f64(sp, machine_state_float_regset); + } + enter(); +} + +void MacroAssembler::restore_machine_state() { + leave(); + if(hasFPU()) { + vldmia_f64(sp, machine_state_float_regset); + } + ldmia(sp, machine_state_regset); +} + +void internal_internal_printf(const char *fmt, ...) { + va_list args; + va_start (args, fmt); + vprintf (fmt, args); + fflush(stdout); + va_end(args); +} + +void internal_printf(const char *format, uint32_t a, uint32_t b, uint32_t c) { + char buf[2048]; + char fmt[2048]; + buf[0] = '\0'; + const char *thread_str = "THREAD 0x%08x : "; + int id = pthread_self(); + strcpy(fmt, format); + + char *str = strtok(fmt, "\n"); + int nreplace = 0; + while(str) { + strcpy(buf, thread_str); + strcat(buf, str); + strcat(buf, "\n"); + internal_internal_printf((const char*)buf, id, a, b, c); + str = strtok(NULL, "\n"); + } +} + +void MacroAssembler::get_bytecode(Register dst, Register bc) { + if(ENABLE_DEBUGGING) { + int nbytecodes = N_J_BYTECODES; + mov(dst, (address)j_bytecodes); + cmp(bc, nbytecodes); + + ldr(dst, Address(dst, bc, lsl(2)), Assembler::LT); + ldr(dst, Address(dst, wordSize * nbytecodes), Assembler::GE); + } +} + +int invocation_depth_count = -1; //TODO remove this with debugging info + +#define MAX_FCALL_DEPTH 4096 +struct thread_method_record{ + int thread_id; + char names[MAX_FCALL_DEPTH][512]; + int invocation_depth_count; +}; +int ntmrs = 0; +#define MAX_TMRS 10 +thread_method_record tmr_list[MAX_TMRS]; + +void push_tmr(Method *meth, int *thread_id, int *invocation_depth_count, char **name) { + int id = pthread_self(); + *thread_id = id; + for(int i = 0; i < ntmrs; i++) { + thread_method_record *tmr = &tmr_list[i]; + if(id == tmr->thread_id) { + // Add a new frame + if(tmr->invocation_depth_count >= -1 && + tmr->invocation_depth_count < (MAX_FCALL_DEPTH - 1)) { + *invocation_depth_count = ++(tmr->invocation_depth_count); + *name = tmr->names[tmr->invocation_depth_count]; + meth->name_and_sig_as_C_string(tmr->names[tmr->invocation_depth_count], 512); + return; + } else { + fprintf(stderr, "%s : Invalid fcall depth index, %d\n", __FUNCTION__, tmr->invocation_depth_count); + exit(1); + } + } + } + // Add a new thread + if(ntmrs >= MAX_TMRS) { + fprintf(stderr, "Too many tmrs\n"); + exit(1); + } + //Create a new tmr + tmr_list[ntmrs].thread_id = id; + tmr_list[ntmrs].invocation_depth_count = 0; + meth->name_and_sig_as_C_string(tmr_list[ntmrs].names[0], 512); + *invocation_depth_count = 0; + *name = tmr_list[ntmrs].names[0]; + ntmrs++; +} + +void pop_tmr(int *thread_id, int *invocation_depth_count, char **name) { + int id = pthread_self(); + *thread_id = id; + for(int i = 0; i < ntmrs; i++) { + thread_method_record *tmr = &tmr_list[i]; + if(id == tmr->thread_id) { + if(tmr->invocation_depth_count >= 0 && + tmr->invocation_depth_count < MAX_FCALL_DEPTH) { + // Pop frame + *name = tmr->names[tmr->invocation_depth_count]; + *invocation_depth_count = (tmr->invocation_depth_count)--; + return; + } else if ( -1 == tmr->invocation_depth_count) { + *name = (char*)"JVM-EXCEPTION-EXIT:(NOT-REALLY-A-FRAME)"; + *invocation_depth_count = 0; + return; + } else { + fprintf(stderr, "%s : Invalid fcall depth index, %d\n", __FUNCTION__, tmr->invocation_depth_count); + exit(1); + } + } + } + fprintf(stderr, "Unable to find suitable tmr\n"); + exit(1); +} + +void prepare_entry_exit_prefix(char *buf, int id, int invocation_depth_count) { + sprintf(buf, "THREAD 0x%08x : ", id); + for(int i = 0; i < invocation_depth_count; i++) { + strcat(buf, " "); + } +} + + +void print_entry(Method *meth, int native) { + char *name; + int invocation_depth_count, id; + push_tmr(meth, &id, &invocation_depth_count, &name); + + if(MacroAssembler::enable_method_debug) { + char buf[4096], buf_b[2048]; + prepare_entry_exit_prefix(buf, id, invocation_depth_count); + if(native) { + sprintf(buf_b, "CALL NATIVE : %s\n", name); + } else { + sprintf(buf_b, "CALL JAVA : %s\n", name); + } + strcat(buf, buf_b); + printf("%s", buf); + fflush(stdout); + } +} + +void print_exit(bool normal) { + char *name; + int invocation_depth_count, id; + pop_tmr(&id, &invocation_depth_count, &name); + + if(MacroAssembler::enable_method_debug) { + char buf[4096], buf_b[2048]; + prepare_entry_exit_prefix(buf, id, invocation_depth_count); + sprintf(buf_b, normal ? "EXIT : %s\n" : "EXCPN EXIT : %s\n", name); + strcat(buf, buf_b); + printf("%s", buf); + fflush(stdout); + } +} + +void MacroAssembler::print_method_entry(Register rmethod, bool native) { + if(ENABLE_DEBUGGING) { + save_machine_state(); + + bic(sp, sp, 7); // 8-byte align stack + mov(rscratch2, (address)print_entry); + mov(r0, rmethod); + mov(r1, native); + bl(rscratch2); + + restore_machine_state(); + } +} + +void MacroAssembler::print_method_exit(bool normal) { + if(ENABLE_DEBUGGING) { + save_machine_state(); + + bic(sp, sp, 7); // 8-byte align stack + mov(rscratch2, (address)print_exit); + mov(r0, normal); + bl(rscratch2); + + restore_machine_state(); + } +} + +void MacroAssembler::reg_printf_internal(bool important, const char *fmt, Register ra, Register rb, Register rc) { + if(ENABLE_DEBUGGING) { + Label skip; + save_machine_state(); + + mov(rscratch1, ra); + str(rscratch1, Address(pre(sp, -wordSize))); + mov(rscratch1, rb); + str(rscratch1, Address(pre(sp, -wordSize))); + mov(rscratch1, rc); + str(rscratch1, Address(pre(sp, -wordSize))); + + if(!important) { + mov(r0, (address)&enable_debug); + ldr(r0, Address(r0)); + cmp(r0, 0); + b(skip, Assembler::EQ); + } + + int sp_difference = wordSize * (count_bits(machine_state_regset) + + 2 * count_bits(machine_state_float_regset) + + 2 + 3); //Frame entry and saved + + mov(r0, (address)fmt); + if(ra != sp) ldr(r1, Address(sp, 2 * wordSize)); + else add(r1, sp, sp_difference); + + if(rb != sp) ldr(r2, Address(sp, wordSize)); + else add(r2, sp, sp_difference); + + if(rc != sp) ldr(r3, Address(sp)); + else add(r3, sp, sp_difference); + + bic(sp, sp, 7); // 8-byte align stack + + mov(rscratch2, (address)internal_printf); + bl(rscratch2); + + bind(skip); + restore_machine_state(); + } +} + +void MacroAssembler::reg_printf(const char *fmt, Register ra, Register rb, Register rc) { + reg_printf_internal(false, fmt, ra, rb, rc); +} + +void MacroAssembler::reg_printf_important(const char *fmt, Register ra, Register rb, Register rc) { + reg_printf_internal(true, fmt, ra, rb, rc); +} + +// When debugging, set the break on bkpnt +void bkpnt() { return; } +void MacroAssembler::create_breakpoint() { + if(ENABLE_DEBUGGING) { + save_machine_state(); + bic(sp, sp, 7); // 8-byte align stack + + mov(rscratch2, (address) bkpnt); + bl(rscratch2); + + restore_machine_state(); + } +} + + +void MacroAssembler::print_cpool(InstanceKlass *klass) { + ttyLocker ttyl; + klass->constants()->print_on(tty); +} + +int MacroAssembler::ldrd(Register Rt, Register Rt2, const Address& adr, Register Rtmp, Condition cond) { + if((0 == Rt->encoding_nocheck() % 2 && + (Rt->encoding_nocheck() + 1 == Rt2->encoding_nocheck())) && + (uabs(adr.offset()) < (1 << 8))) { + /* Good to go with a ldrd */ + ldrd(Rt, adr, cond); + return 0x0; + } else { + return double_ld_failed_dispatch(Rt, Rt2, adr, &Assembler::ldm, + &Assembler::ldr, Rtmp, cond); + } +} + +int MacroAssembler::strd(Register Rt, Register Rt2, const Address& adr, Condition cond) { + if((0 == Rt->encoding_nocheck() % 2 && + (Rt->encoding_nocheck() + 1 == Rt2->encoding_nocheck())) && + (uabs(adr.offset()) < (1 << 8))) { + /* Good to go with a strd */ + strd(Rt, adr, cond); + } else { + double_ldst_failed_dispatch(Rt, Rt2, adr, &Assembler::stm, &Assembler::str, cond); + } + return 0x0; +} + +int MacroAssembler::double_ld_failed_dispatch(Register Rt, Register Rt2, const Address& adr, + void (Assembler::* mul)(unsigned, const Address&, Condition), + void (Assembler::* sgl)(Register, const Address&, Condition), + Register Rtmp, Condition cond) { + if (can_ldst_multiple(RegSet::of(Rt, Rt2).bits(), adr) && + (Rt->encoding_nocheck() < Rt2->encoding_nocheck())) { + /* Do a load or store multiple instruction */ + (this->*mul)(RegSet::of(Rt, Rt2).bits(), adr, cond); + } else if (!adr.uses(Rt)) { + double_ldst_failed_dispatch(Rt, Rt2, adr, mul, sgl, cond); + } else { + // need to reshuffle operation, otherwise write to Rt destroys adr + if (adr.get_mode() != Address::reg) { + // offset-based addressing. hence Rt2 could not be by adr + if (adr.get_wb_mode() == Address::pre) { + (this->*sgl)(Rt2, Address(pre(adr.base(), adr.offset() + wordSize)), cond); + (this->*sgl)(Rt, Address(pre(adr.base(), -wordSize)), cond); + } else if (adr.get_wb_mode() == Address::post) { + (this->*sgl)(Rt2, Address(adr.base(), adr.offset() + wordSize), cond); + (this->*sgl)(Rt, adr, cond); + } else if (adr.get_wb_mode() == Address::off) { + (this->*sgl)(Rt2, Address(adr.base(), adr.offset() + wordSize), cond); + (this->*sgl)(Rt, adr, cond); + } else { + ShouldNotReachHere(); + } + } else { + // index-based addressing. both Rt and Rt2 could be used by adr + // hence temp register is necessary + adr.lea(this, Rtmp); + double_ldst_failed_dispatch(Rt, Rt2, Address(Rtmp), mul, sgl, cond); + // adr.lea have only address manipulation and cannot cause trap. + // first instruction when NPE can occur is in double_ldst_failed_dispatch + // so shift offset appropriately + return 0x4; + } + } + return 0x0; +} + +void MacroAssembler::double_ldst_failed_dispatch(Register Rt, Register Rt2, const Address& adr, + void (Assembler::* mul)(unsigned, const Address&, Condition), + void (Assembler::* sgl)(Register, const Address&, Condition), + Condition cond) { + if (can_ldst_multiple(RegSet::of(Rt, Rt2).bits(), adr) && + (Rt->encoding_nocheck() < Rt2->encoding_nocheck())) { + /* Do a store multiple instruction */ + (this->*mul)(RegSet::of(Rt, Rt2).bits(), adr, cond); + } else { + if (adr.get_mode() != Address::reg) { + // offset-based addressing + if (adr.get_wb_mode() == Address::pre) { + (this->*sgl)(Rt, adr, cond); + (this->*sgl)(Rt2, Address(adr.base(), wordSize), cond); + } else if (adr.get_wb_mode() == Address::post) { + (this->*sgl)(Rt, adr, cond); + (this->*sgl)(Rt2, Address(adr.base(), wordSize - adr.offset()), cond); + } else if (adr.get_wb_mode() == Address::off) { + (this->*sgl)(Rt, adr, cond); + (this->*sgl)(Rt2, Address(adr.base(), adr.offset() + wordSize), cond); + } else { + ShouldNotReachHere(); + } + } else { + // index-based addressing + if (adr.get_wb_mode() == Address::pre) { + // current implementation does not use Address::pre for indexed access + ShouldNotReachHere(); + } else if (adr.get_wb_mode() == Address::post) { + // current implementation does not use Address:post for indexed access + // enable the code below and implement proper post() method if it is required +#if 0 + (this->*sgl)(Rt, Address(post(adr.base(), wordSize)), cond); + (this->*sgl)(Rt2, Address(post(adr.base(), adr.index(), adr.shift())), cond); + sub(adr.base(), wordSize, cond); +#endif + ShouldNotReachHere(); + } else if (adr.get_wb_mode() == Address::off) { + (this->*sgl)(Rt, Address(pre(adr.base(), adr.index(), adr.shift(), adr.op())), cond); + (this->*sgl)(Rt2, Address(adr.base(), wordSize), cond); + compensate_addr_offset(adr, cond); + } else { + ShouldNotReachHere(); + } + } + } +} + +#ifdef ASSERT +void MacroAssembler::verify_stack_alignment() { + if (StackAlignmentInBytes > 4) { + Label x; + tst(sp, StackAlignmentInBytes-1); + b(x, EQ); + stop("stack unaligned"); + bind(x); + } +} +#endif + +/** + * Code for BigInteger::multiplyToLen() instrinsic. + * + * r0: x + * r1: xlen + * r2: y + * r3: ylen + * r4: z + * r5: zlen + * + */ +void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6) { + + assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + + const Register xc = xlen; + const Register yc = tmp1; + const Register zc = tmp2; + + const Register vz = tmp3; + const Register carry = tmp4; + const Register vx = tmp5; + const Register vy = tmp6; + + // ensure y (inner cycle) is shorter than x (outer cycle), this in theory uses CPU caches more effectively + Label L_x_longer; + cmp(xlen, ylen); + b(L_x_longer, Assembler::GE); +#define SWP(X, Y) \ + mov(tmp1, Y); \ + mov(Y, X); \ + mov(X, tmp1) + SWP(x, y); + SWP(xlen, ylen); + bind(L_x_longer); + + lea(xc, Address(x, xlen, lsl(LogBytesPerInt))); // x[xstart] + lea(yc, Address(y, ylen, lsl(LogBytesPerInt))); // y[idx] + lea(zc, Address(z, zlen, lsl(LogBytesPerInt))); // z[kdx] + + // First Loop. + // + // final static long LONG_MASK = 0xffffffffL; + // int xstart = xlen - 1; + // int ystart = ylen - 1; + // long carry = 0; + // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) { + // long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry; + // z[kdx] = (int)product; + // carry = product >>> 32; + // } + // z[xstart] = (int)carry; + // + + ldr(vx, Assembler::pre(xc, -BytesPerInt)); + mov(carry, 0); + + Label L_loop_1; + bind(L_loop_1); + ldr(vy, Assembler::pre(yc, -BytesPerInt)); + mov(vz, 0); + umaal(vz, carry, vx, vy); + str(vz, Assembler::pre(zc, -BytesPerInt)); + cmp(yc, y); + b(L_loop_1, Assembler::GT); + + str(carry, Address(zc, -BytesPerInt)); + + // Second and third (nested) loops. + // + // for (int i = xstart-1; i >= 0; i--) { // Second loop + // carry = 0; + // for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop + // long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) + + // (z[k] & LONG_MASK) + carry; + // z[k] = (int)product; + // carry = product >>> 32; + // } + // z[i] = (int)carry; + // } + // + Label L_loop_2, L_loop_3; + bind(L_loop_2); + + sub(zlen, zlen, 1); + lea(yc, Address(y, ylen, lsl(LogBytesPerInt))); // y[jdx] + lea(zc, Address(z, zlen, lsl(LogBytesPerInt))); // z[k] + + ldr(vx, Assembler::pre(xc, -BytesPerInt)); + mov(carry, 0); + + bind(L_loop_3); + ldr(vy, Assembler::pre(yc, -BytesPerInt)); + ldr(vz, Assembler::pre(zc, -BytesPerInt)); // r1 is vz, r2 is carry + umaal(vz, carry, vx, vy); + str(vz, Address(zc)); + cmp(yc, y); + b(L_loop_3, Assembler::GT); + + str(carry, Address(zc, -BytesPerInt)); + cmp(xc, x); + b(L_loop_2, Assembler::GT); +} + +/** + * Code for BigInteger::mulAdd() instrinsic. + * + * r0: out + * r1: in + * r2: offset + * r3: len + * r4: k + */ +void MacroAssembler::mul_add(Register out, Register in, Register offset, Register len, Register k, + Register tmp1, Register tmp2, Register tmp3) { + + assert_different_registers(out, in, offset, len, k, tmp1, tmp2, tmp3); + + Register vin = tmp1; + Register vout = tmp2; + Register carry = tmp3; + Register result = r0; + +// long kLong = k & LONG_MASK; +// long carry = 0; +// +// offset = out.length-offset - 1; +// for (int j=len-1; j >= 0; j--) { +// long product = (in[j] & LONG_MASK) * kLong + +// (out[offset] & LONG_MASK) + carry; +// out[offset--] = (int)product; +// carry = product >>> 32; +// } +// return (int)carry; + + lea(in, Address(in, len, lsl(LogBytesPerInt))); + lea(out, Address(out, offset, lsl(LogBytesPerInt))); + mov(carry, 0); + + Label L_loop; + bind(L_loop); + ldr(vin, Assembler::pre(in, -BytesPerInt)); + ldr(vout, Assembler::pre(out, -BytesPerInt)); + umaal(vout, carry, vin, k); + str(vout, Address(out)); + subs(len, len, 1); + b(L_loop, Assembler::GT); + + mov(result, carry); +} + +/** + * Emits code to update CRC-32 with a byte value according to constants in table + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); + * + */ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + eor(val, val, crc); + andr(val, val, 0xff); + ldr(val, Address(table, val, lsl(2))); + eor(crc, val, crc, Assembler::lsr(8)); +} + +/** + * Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3 + * + * @param [in,out]crc Register containing the crc. + * @param [in]v Register containing the 32-bit to fold into the CRC. + * @param [in]table0 Register containing table 0 of crc constants. + * @param [in]table1 Register containing table 1 of crc constants. + * @param [in]table2 Register containing table 2 of crc constants. + * @param [in]table3 Register containing table 3 of crc constants. + * + * uint32_t crc; + * v = crc ^ v + * crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24] + * + */ +void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp, + Register tmp2, Register table0, Register table1, Register table2, Register table3) { + eor(v, crc, v); + uxtb(tmp, v); + uxtb(tmp2, v, ror(8)); + ldr(crc, Address(table3, tmp, lsl(2))); + ldr(tmp2, Address(table2, tmp2, lsl(2))); + uxtb(tmp, v, ror(16)); + eor(crc, crc, tmp2); + uxtb(tmp2, v, ror(24)); + ldr(tmp, Address(table1, tmp, lsl(2))); + ldr(tmp2, Address(table0, tmp2, lsl(2))); + eor(crc, crc, tmp); + eor(crc, crc, tmp2); +} + +/** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) + * @param len register containing number of bytes + * @param table register that will contain address of CRC table + * @param tmp scratch register + */ +void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, + Register table0, Register table1, Register table2, Register table3, + Register tmp, Register tmp2, Register tmp3, int is_crc32c) { + Label L_cpu, L_by8_loop, L_by1, L_by1_loop, L_align_by1_loop, L_align_exit, L_exit; + + if (!is_crc32c) + inv(crc, crc); + if (UseCRC32) { + Label CRC_by4_loop, CRC_by1_loop; + + subs(len, len, 4); + b(CRC_by4_loop, Assembler::GE); + adds(len, len, 4); + b(CRC_by1_loop, Assembler::GT); + b(L_exit); + + BIND(CRC_by4_loop); + ldr(tmp, Address(post(buf, 4))); + subs(len, len, 4); + if (!is_crc32c) + crc32w(crc, crc, tmp); + else // is_crc32c + crc32cw(crc, crc, tmp); + b(CRC_by4_loop, Assembler::GE); + adds(len, len, 4); + b(L_exit, Assembler::LE); + BIND(CRC_by1_loop); + ldrb(tmp, Address(post(buf, 1))); + subs(len, len, 1); + if (!is_crc32c) + crc32b(crc, crc, tmp); + else // is_crc32c + crc32cb(crc, crc, tmp); + b(CRC_by1_loop, Assembler::GT); + BIND(L_exit); + if (!is_crc32c) + inv(crc, crc); + return; + } + lea(table0, ExternalAddress( + !is_crc32c ? + StubRoutines::crc_table_addr() : + StubRoutines::crc32c_table_addr() )); + add(table1, table0, 1*256*sizeof(juint)); + add(table2, table0, 2*256*sizeof(juint)); + add(table3, table0, 3*256*sizeof(juint)); + + BIND(L_align_by1_loop); + tst(buf, 3); + b(L_align_exit, Assembler::EQ); + cmp(len, 0); + b(L_exit, Assembler::EQ); + sub(len, len, 1); + ldrb(tmp, Address(post(buf, 1))); + update_byte_crc32(crc, tmp, table0); + b(L_align_by1_loop); + + BIND(L_align_exit); + + if(VM_Version::features() & FT_AdvSIMD) { + if (UseNeon) { + cmp(len, 32+12); // account for possible need for alignment + b(L_cpu, Assembler::LT); + + Label L_fold, L_align_by4_loop, L_align_by4_exit; + + BIND(L_align_by4_loop); + tst(buf, 0xf); + b(L_align_by4_exit, Assembler::EQ); + ldr(tmp, Address(post(buf, 4))); + update_word_crc32(crc, tmp, tmp2, tmp3, table0, table1, table2, table3); + sub(len, len, 4); + b(L_align_by4_loop); + + BIND(L_align_by4_exit); + + add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants + + vld1_64(d0, d1, post(buf, 16), Assembler::ALIGN_128); + vld1_64(d4, post(tmp, 8), Assembler::ALIGN_64); + vld1_64(d5, post(tmp, 8), Assembler::ALIGN_64); + vld1_64(d6, post(tmp, 8), Assembler::ALIGN_64); + vld1_64(d7, post(tmp, 8), Assembler::ALIGN_64); + veor_64(d16, d16, d16); + vmov_32(d16, 0, crc); + + veor_64(d0, d0, d16); + sub(len, len, 32); + + BIND(L_fold); + vmullp_8(q8, d0, d5); + vmullp_8(q9, d0, d7); + vmullp_8(q10, d0, d4); + vmullp_8(q11, d0, d6); + + vmullp_8(q12, d1, d5); + vmullp_8(q13, d1, d7); + vmullp_8(q14, d1, d4); + vmullp_8(q15, d1, d6); + + vuzp_128_16(q9, q8); + veor_128(q8, q8, q9); + + vuzp_128_16(q13, q12); + veor_128(q12, q12, q13); + + vshll_16u(q9, d16, 8); + vshll_16u(q8, d17, 8); + + vshll_16u(q13, d24, 8); + vshll_16u(q12, d25, 8); + + veor_128(q8, q8, q10); + veor_128(q12, q12, q14); + veor_128(q9, q9, q11); + veor_128(q13, q13, q15); + + veor_64(d19, d19, d18); + veor_64(d18, d27, d26); + + vshll_32u(q13, d18, 16); + vshll_32u(q9, d19, 16); + + veor_128(q9, q8, q9); + veor_128(q13, q12, q13); + + veor_64(d31, d26, d27); + veor_64(d30, d18, d19); + + vshl_128_64(q15, q15, 1); + vld1_64(d0, d1, post(buf, 16), Assembler::ALIGN_128); + veor_128(q0, q0, q15); + + subs(len, len, 16); + b(L_fold, Assembler::GE); + + vmov_32(tmp, d0, 0); + mov(crc, 0); + update_word_crc32(crc, tmp, tmp2, tmp3, table0, table1, table2, table3); + vmov_32(tmp, d0, 1); + update_word_crc32(crc, tmp, tmp2, tmp3, table0, table1, table2, table3); + vmov_32(tmp, d1, 0); + update_word_crc32(crc, tmp, tmp2, tmp3, table0, table1, table2, table3); + vmov_32(tmp, d1, 1); + update_word_crc32(crc, tmp, tmp2, tmp3, table0, table1, table2, table3); + + add(len, len, 16); + } + } // if FT_AdvSIMD + + BIND(L_cpu); + subs(len, len, 8); + b(L_by8_loop, Assembler::GE); + adds(len, len, 8); + b(L_by1_loop, Assembler::GT); + b(L_exit); + + BIND(L_by8_loop); + ldr(tmp, Address(post(buf, 4))); + update_word_crc32(crc, tmp, tmp2, tmp3, table0, table1, table2, table3); + ldr(tmp, Address(post(buf, 4))); + update_word_crc32(crc, tmp, tmp2, tmp3, table0, table1, table2, table3); + subs(len, len, 8); + b(L_by8_loop, Assembler::GE); + adds(len, len, 8); + b(L_exit, Assembler::LE); + BIND(L_by1_loop); + subs(len, len, 1); + ldrb(tmp, Address(post(buf, 1))); + update_byte_crc32(crc, tmp, table0); + b(L_by1_loop, Assembler::GT); + + BIND(L_exit); + if (!is_crc32c) + inv(crc, crc); +} + +/** + * First round Key (cpu implementation) + * @param in register containing address of input data (plain or cipher text) + * @param key register containing address of the key data + * @param t0 output register t0 + * @param t1 output register t1 + * @param t2 output register t2 + * @param t3 output register t3 + * @param t4 temporary register + * @param t5 temporary register + * @param t6 temporary register + * @param t7 temporary register + */ +void MacroAssembler::kernel_aescrypt_firstRound(Register in, Register key, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6, Register t7) { + + ldr(t4, Address(post(key, 4))); + ldr(t5, Address(post(key, 4))); + ldr(t6, Address(post(key, 4))); + ldr(t7, Address(post(key, 4))); + ldr(t0, Address(post(in, 4))); + ldr(t1, Address(post(in, 4))); + ldr(t2, Address(post(in, 4))); + ldr(t3, Address(post(in, 4))); + rev(t0, t0); + rev(t1, t1); + rev(t2, t2); + rev(t3, t3); + eor(t0, t0, t4); + eor(t1, t1, t5); + eor(t2, t2, t6); + eor(t3, t3, t7); +} + +/** + * AES ECB Round + * @param table_te Register contains address of AES replacement table + * @param key register containing address of the key data + * @param t0 Register for input value t0 + * @param t1 Register for input value t1 + * @param t2 Register for input value t2 + * @param t3 Register for input value t3 + * @param a Register for output value + * @param tmp1 Temporary register 1 + * @param tmp2 Temporary register 2 + */ +void MacroAssembler::kernel_aescrypt_round(Register table_te, Register key, + Register t0, Register t1, Register t2, Register t3, + Register a, Register tmp1, Register tmp2) { + + ldr(a, Address(post(key, 4))); // K + uxtb(tmp1, t0, ror(24)); + ldr(tmp1, Address(table_te, tmp1, lsl(2))); // T1 + uxtb(tmp2, t1, ror(16)); + eor(a, a, tmp1); + ldr(tmp2, Address(table_te, tmp2, lsl(2))); // T2 + uxtb(tmp1, t2, ror(8)); + eor(a, a, tmp2, ror(8)); + ldr(tmp1, Address(table_te, tmp1, lsl(2))); // T3 + uxtb(tmp2, t3); + eor(a, a, tmp1, ror(16)); + ldr(tmp2, Address(table_te, tmp2, lsl(2))); // T4 + eor(a, a, tmp2, ror(24)); // a0 +}; + +/** + * + * Last AES encryption round ( 4 bytes ) + * @param table_te + * @param key + * @param to + * @param t0 + * @param t1 + * @param t2 + * @param t3 + * @param t4 + * @param t5 + * @param t6 + * @param t7 + * + * int tt = K[keyOffset++]; + * out[outOffset++] = (byte)(S[(t0 >>> 24) ] ^ (tt >>> 24)); + * out[outOffset++] = (byte)(S[(t1 >>> 16) & 0xFF] ^ (tt >>> 16)); + * out[outOffset++] = (byte)(S[(t2 >>> 8) & 0xFF] ^ (tt >>> 8)); + * out[outOffset++] = (byte)(S[(t3 ) & 0xFF] ^ (tt )); + */ +void MacroAssembler::kernel_aescrypt_lastRound( + Register table_te, Register key, Register to, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6, Register t7) { + + ldr(t7, Address(post(key, 4))); // tt + + uxtb(t5, t0, ror(24)); + ldr(t4, Address(table_te, t5, lsl(2))); // S[] + uxtb(t6, t1, ror(16)); + eor(t4, t4, t7, lsr(24)); + ldr(t6, Address(table_te, t6, lsl(2))); // S[] + uxtb(t5, t2, ror(8)); + eor(t6, t6, t7, lsr(16)); + uxtb(t6, t6); + add(t4, t4, t6, lsl(8)); + ldr(t5, Address(table_te, t5, lsl(2))); // S[] + uxtb(t6, t3); + eor(t5, t5, t7, lsr(8)); + uxtb(t5, t5); + add(t4, t4, t5, lsl(16)); + ldr(t6, Address(table_te, t6, lsl(2))); // S[] + eor(t6, t6, t7); + uxtb(t6, t6); + add(t4, t4, t6, lsl(24)); + + str(t4, Address(post(to, 4))); + +} + +/** + * + * Last AES encryption round ( 4 bytes ) + * @param table_te + * @param key + * @param to + * @param t0 + * @param t1 + * @param t2 + * @param t3 + * @param t4 + * @param t5 + * @param t6 + * @param t7 + * + * int tt = K[keyOffset++]; + * out[outOffset++] = (byte)(S[(t0 >>> 24) ] ^ (tt >>> 24)); + * out[outOffset++] = (byte)(S[(t1 >>> 16) & 0xFF] ^ (tt >>> 16)); + * out[outOffset++] = (byte)(S[(t2 >>> 8) & 0xFF] ^ (tt >>> 8)); + * out[outOffset++] = (byte)(S[(t3 ) & 0xFF] ^ (tt )); + */ +void MacroAssembler::kernel_aescrypt_lastRound_cbc( + Register table_te, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6) { + + uxtb(t5, t0, ror(24)); + ldr(t4, Address(table_te, t5, lsl(2))); // S[] + uxtb(t6, t1, ror(16)); + ldr(t6, Address(table_te, t6, lsl(2))); // S[] + uxtb(t5, t2, ror(8)); + add(t4, t4, t6, lsl(8)); + ldr(t5, Address(table_te, t5, lsl(2))); // S[] + uxtb(t6, t3); + add(t4, t4, t5, lsl(16)); + ldr(t6, Address(table_te, t6, lsl(2))); // S[] + add(t4, t4, t6, lsl(24)); +} + +/** + * AES ECB encryption + * + * @param from register pointing to source array address + * @param to register pointing to destination array address + * @param key register pointing to key + * @param keylen register containing key len in bytes + */ +void MacroAssembler::kernel_aescrypt_encryptBlock(Register from, Register to, + Register key, Register keylen, Register table_te, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6, Register t7) { + Label L_loop; + lea(table_te, ExternalAddress(StubRoutines::aes_table_te_addr())); + + ldr(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - + arrayOopDesc::base_offset_in_bytes(T_INT))); + + + kernel_aescrypt_firstRound(from, key, + t0, t1, t2, t3, t4, t5, t6, t7); + + sub(keylen, keylen, 8); + BIND(L_loop); + + kernel_aescrypt_round(table_te, key, + t0, t1, t2, t3, t4, t7, from); + kernel_aescrypt_round(table_te, key, + t1, t2, t3, t0, t5, t7, from); + kernel_aescrypt_round(table_te, key, + t2, t3, t0, t1, t6, t7, from); + + uxtb(t7, t3, ror(24)); + ldr(t3, Address(table_te, t7, lsl(2))); // T1 + uxtb(t7, t0, ror(16)); + ldr(t7, Address(table_te, t7, lsl(2))); // T2 + mov(t0, t4); // t0=a0 + eor(t3, t3, t7, ror(8)); + uxtb(t7, t1, ror(8)); + ldr(t7, Address(table_te, t7, lsl(2))); // T3 + mov(t1, t5); // t1=a1 + eor(t3, t3, t7, ror(16)); + uxtb(t7, t2); + ldr(t7, Address(table_te, t7, lsl(2))); // T4 + mov(t2, t6); // t2=a2 + eor(t3, t3, t7, ror(24)); + ldr(t7, Address(post(key, 4))); // K + eor(t3, t3, t7); // t3 = a3 + + subs(keylen, keylen, 4); + b(L_loop, Assembler::NE); + + // last round is special + add(table_te, table_te, 4 * 256); //S + + kernel_aescrypt_lastRound( + table_te, key, to, + t0, t1, t2, t3, + t4, t5, t6, t7); + + kernel_aescrypt_lastRound( + table_te, key, to, + t1, t2, t3, t0, + t4, t5, t6, t7); + + kernel_aescrypt_lastRound( + table_te, key, to, + t2, t3, t0, t1, + t4, t5, t6, t7); + + kernel_aescrypt_lastRound( + table_te, key, to, + t3, t0, t1, t2, + t4, t5, t6, t7); +} + +/** + * AES ECB decryption + * @param from register pointing to source array address + * @param to register pointing to destination array address + * @param key register pointing to key + * @param keylen register containing key len in bytes + */ +void MacroAssembler::kernel_aescrypt_decryptBlock(Register from, Register to, + Register key, Register keylen, Register table_te, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6, Register t7) { + Label L_loop; + lea(table_te, ExternalAddress(StubRoutines::aes_table_td_addr())); + + ldr(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - + arrayOopDesc::base_offset_in_bytes(T_INT))); + + push(key, sp); + + add(key, key, 16); + kernel_aescrypt_firstRound(from, key, + t0, t1, t2, t3, t4, t5, t6, t7); + + sub(keylen, keylen, 8); + BIND(L_loop); + + kernel_aescrypt_round(table_te, key, + t0, t3, t2, t1, t4, t7, from); + kernel_aescrypt_round(table_te, key, + t1, t0, t3, t2, t5, t7, from); + kernel_aescrypt_round(table_te, key, + t2, t1, t0, t3, t6, t7, from); + + uxtb(t7, t3, ror(24)); + ldr(t3, Address(table_te, t7, lsl(2))); // T1 + uxtb(t7, t2, ror(16)); + ldr(t7, Address(table_te, t7, lsl(2))); // T2 + mov(t2, t6); // t2=a2 + eor(t3, t3, t7, ror(8)); + uxtb(t7, t1, ror(8)); + ldr(t7, Address(table_te, t7, lsl(2))); // T3 + mov(t1, t5); // t1=a1 + eor(t3, t3, t7, ror(16)); + uxtb(t7, t0); + ldr(t7, Address(table_te, t7, lsl(2))); // T4 + mov(t0, t4); // t0=a0 + eor(t3, t3, t7, ror(24)); + ldr(t7, Address(post(key, 4))); // K + eor(t3, t3, t7); // t3 = a3 + + subs(keylen, keylen, 4); + b(L_loop, Assembler::NE); + + pop(key, sp); + // last round is special + add(table_te, table_te, 4 * 256); //S + + kernel_aescrypt_lastRound( + table_te, key, to, + t0, t3, t2, t1, + t4, t5, t6, t7); + + kernel_aescrypt_lastRound( + table_te, key, to, + t1, t0, t3, t2, + t4, t5, t6, t7); + + kernel_aescrypt_lastRound( + table_te, key, to, + t2, t1, t0, t3, + t4, t5, t6, t7); + + kernel_aescrypt_lastRound( + table_te, key, to, + t3, t2, t1, t0, + t4, t5, t6, t7); +} + +/** + * AES CBC encryption + * + * @param from register pointing to source array address + * @param to register pointing to destination array address + * @param key register pointing to key + * @param rvec register pointing to roundkey vector + * @param len register containing source len in bytes + */ +void MacroAssembler::kernel_aescrypt_encrypt(Register from, Register to, + Register key, Register rvec, Register len, Register keylen, Register table_te, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6) { + Label L_loop, L_loop2; + lea(table_te, ExternalAddress(StubRoutines::aes_table_te_addr())); + ldr(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - + arrayOopDesc::base_offset_in_bytes(T_INT))); + + vld1_64(d4, d5, Address(rvec), Assembler::ALIGN_STD); // read rvec bytes to q2 + vld1_64(d2, d3, Address(post(key, 16)), Assembler::ALIGN_STD); // read key to q1 + sub(keylen, keylen, 8); + + add(t4, key, keylen, lsl(2)); + vld1_64(d8, d9, Address(t4), Assembler::ALIGN_STD); // read last key bytes to q4 + vrev32_128_8(q4, q4); + + push(to, sp); + BIND(L_loop2); + // get round key and first round + vld1_64(d0, d1, Address(post(from, 16)), Assembler::ALIGN_STD); // read 16 bytes to q0 + veor_128(q0, q0, q2); + vrev32_128_8(q0, q0); + veor_128(q0, q0, q1); + vmov_f64(t0, t1, d0); + vmov_f64(t2, t3, d1); + + push(RegSet::of(key, from), sp); + push(RegSet::of(to, keylen), sp); + + BIND(L_loop); + + kernel_aescrypt_round(table_te, key, + t0, t1, t2, t3, t4, to, from); + kernel_aescrypt_round(table_te, key, + t1, t2, t3, t0, t5, to, from); + kernel_aescrypt_round(table_te, key, + t2, t3, t0, t1, t6, to, from); + + uxtb(to, t3, ror(24)); + ldr(t3, Address(table_te, to, lsl(2))); // T1 + uxtb(to, t0, ror(16)); + ldr(to, Address(table_te, to, lsl(2))); // T2 + mov(t0, t4); // t0=a0 + eor(t3, t3, to, ror(8)); + uxtb(to, t1, ror(8)); + ldr(to, Address(table_te, to, lsl(2))); // T3 + mov(t1, t5); // t1=a1 + eor(t3, t3, to, ror(16)); + uxtb(to, t2); + ldr(to, Address(table_te, to, lsl(2))); // T4 + mov(t2, t6); // t2=a2 + eor(t3, t3, to, ror(24)); + ldr(to, Address(post(key, 4))); // K + eor(t3, t3, to); // t3 = a3 + + subs(keylen, keylen, 4); + b(L_loop, Assembler::NE); + + // last round is special + add(table_te, table_te, 4 * 256); //S + kernel_aescrypt_lastRound_cbc( + table_te, + t0, t1, t2, t3, + t4, t5, t6); + + kernel_aescrypt_lastRound_cbc( + table_te, + t1, t2, t3, t0, + t5, t6, from); + vmov_f64(d6, t4, t5); + + kernel_aescrypt_lastRound_cbc( + table_te, + t2, t3, t0, t1, + t4, t5, t6); + + kernel_aescrypt_lastRound_cbc( + table_te, + t3, t0, t1, t2, + t5, t6, from); + vmov_f64(d7, t4, t5); + veor_128(q2, q4, q3); + + pop(RegSet::of(to, keylen), sp); + sub(table_te, table_te, 4 * 256); //Te + vst1_64(d4, Address(post(to, 8)), Assembler::ALIGN_STD); + pop(RegSet::of(key, from), sp); + vst1_64(d5, Address(post(to, 8)), Assembler::ALIGN_STD); + + subs(len, len, 16); + b(L_loop2, Assembler::NE); + vstr_f64(d4, Address(rvec)); + vstr_f64(d5, Address(rvec, 8)); + mov(r0, to); + pop(to, sp); + sub(r0, r0, to); +}; + +/** + * AES CBC decryption + * + * @param from register pointing to source array address + * @param to register pointing to destination array address + * @param key register pointing to key + * @param rvec register pointing to roundkey vector + * @param len register containing source len in bytes + */ +void MacroAssembler::kernel_aescrypt_decrypt(Register from, Register to, + Register key, Register rvec, Register len, Register keylen, Register table_te, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6) { + Label L_loop, L_loop2; + lea(table_te, ExternalAddress(StubRoutines::aes_table_td_addr())); + + ldr(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - + arrayOopDesc::base_offset_in_bytes(T_INT))); + + vld1_64(d2, d3, Address(post(key, 16)), Assembler::ALIGN_STD); // read key to q1 + vld1_64(d4, d5, Address(rvec), Assembler::ALIGN_STD); // read rvec bytes to q2 + vld1_64(d10, d11, Address(post(key, 16)), Assembler::ALIGN_STD); // read key to q5 + vrev32_128_8(q1, q1); + sub(keylen, keylen, 8); + + push(to, sp); + BIND(L_loop2); + // get round key and first round + vld1_64(d8, d9, Address(post(from, 16)), Assembler::ALIGN_STD); // read 16 bytes to q4 + + push(RegSet::of(to, key, from, keylen), sp); + vrev32_128_8(q0, q4); + veor_128(q0, q0, q5); + vmov_f64(t0, t1, d0); + vmov_f64(t2, t3, d1); + + BIND(L_loop); + + kernel_aescrypt_round(table_te, key, + t0, t3, t2, t1, t4, to, from); + kernel_aescrypt_round(table_te, key, + t1, t0, t3, t2, t5, to, from); + kernel_aescrypt_round(table_te, key, + t2, t1, t0, t3, t6, to, from); + + uxtb(to, t3, ror(24)); + ldr(t3, Address(table_te, to, lsl(2))); // T1 + uxtb(to, t2, ror(16)); + ldr(to, Address(table_te, to, lsl(2))); // T2 + mov(t2, t6); // t2=a2 + eor(t3, t3, to, ror(8)); + uxtb(to, t1, ror(8)); + ldr(to, Address(table_te, to, lsl(2))); // T3 + mov(t1, t5); // t1=a1 + eor(t3, t3, to, ror(16)); + uxtb(to, t0); + ldr(to, Address(table_te, to, lsl(2))); // T4 + mov(t0, t4); // t0=a0 + eor(t3, t3, to, ror(24)); + ldr(to, Address(post(key, 4))); // K + eor(t3, t3, to); // t3 = a3 + + subs(keylen, keylen, 4); + b(L_loop, Assembler::NE); + + // last round is special + add(table_te, table_te, 4 * 256); //S + + kernel_aescrypt_lastRound_cbc( + table_te, + t0, t3, t2, t1, + t4, t5, t6); + + kernel_aescrypt_lastRound_cbc( + table_te, + t1, t0, t3, t2, + t5, t6, to); + vmov_f64(d6, t4, t5); //q3 + + kernel_aescrypt_lastRound_cbc( + table_te, + t2, t1, t0, t3, + t4, t5, t6); + + kernel_aescrypt_lastRound_cbc( + table_te, + t3, t2, t1, t0, + t5, t6, to); + vmov_f64(d7, t4, t5); //q3 + pop(RegSet::of(to, key, from, keylen), sp); + veor_128(q3, q1, q3); + veor_128(q3, q3, q2); + vshl_128_64(q2, q4, 0); + + sub(table_te, table_te, 4 * 256); //Te + + vst1_64(d6, Address(post(to, 8)), Assembler::ALIGN_STD); + subs(len, len, 16); + vst1_64(d7, Address(post(to, 8)), Assembler::ALIGN_STD); + + b(L_loop2, Assembler::NE); + + vstr_f64(d4, Address(rvec)); + vstr_f64(d5, Address(rvec, 8)); + mov(r0, to); + pop(to, sp); + sub(r0, r0, to); +}; + +/* + * First round of SHA1 algorithm + */ +void MacroAssembler::sha_round1(Register st_b, Register st_c, Register st_d, + Register tmp, Register st_f, int sh) { + if (sh) { + eor(st_f, st_d, st_c, ror(32-sh)); + } else { + eor(st_f, st_d, st_c); + } + andr(st_f, st_f, st_b); + eor(st_f, st_f, st_d); +} + +/* + * Second and forth round of SHA1 algorithm + */ +void MacroAssembler::sha_round2(Register st_b, Register st_c, Register st_d, + Register tmp, Register st_f, int sh) { + if (sh) { + eor(st_f, st_b, st_c, ror(32-sh)); + } else { + eor(st_f, st_b, st_c); + } + eor(st_f, st_f, st_d); +} + +/* + * Third round of SHA1 algorithm + */ +void MacroAssembler::sha_round3(Register st_b, Register st_c, Register st_d, + Register tmp, Register st_f, int sh) { + if (sh) { + andr(st_f, st_b, st_c, ror(32-sh)); + orr(tmp, st_b, st_c, ror(32-sh)); + } else { + andr(st_f, st_b, st_c); + orr(tmp, st_b, st_c); + } + andr(tmp, st_d, tmp); + orr(st_f, st_f, tmp); +} + +/* + * Calculate Deltas w[i] and w[i+1] + * w[i] = (w[i-3] xor w[i-8] xor w[i-14] xor w[i-16]) rotl 1 + */ +void MacroAssembler::sha_w0(FloatRegister w16, FloatRegister w14, + FloatRegister w8, FloatRegister w4, FloatRegister w2, + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4, + FloatRegister st_k, FloatRegister st_kw, bool update) { + vadd_64_32(st_kw, st_k, w16); + if(update) { + veor_64(tmp1, w16, w14); + vext_64(tmp2, w2, w4, 4); + veor_64(tmp3, tmp1, w8); + veor_64(tmp4, tmp3, tmp2); + + vshr_64_u32(tmp1, tmp4, 31); + vshl_64_32(tmp2, tmp4, 1); + vorr_64(w16, tmp1, tmp2); + } +} +/* + * Calculate Deltas w[i] and w[i+1] + */ +void MacroAssembler::sha_w(FloatRegister w16, FloatRegister w14, + FloatRegister w12, FloatRegister w10, FloatRegister w8, + FloatRegister w6, FloatRegister w4, FloatRegister w2, + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4, + FloatRegister st_k, FloatRegister st_kw, Register counter, Register rtmp, + bool update) { + Label L_7, L_6, L_5, L_4, L_3, L_2, L_1, L_done; + andr(rtmp, counter, 0x7); + add(counter, counter, 1); + cmp(rtmp, 7); + b(L_7, Assembler::EQ); + cmp(rtmp, 6); + b(L_6, Assembler::EQ); + cmp(rtmp, 5); + b(L_5, Assembler::EQ); + cmp(rtmp, 4); + b(L_4, Assembler::EQ); + cmp(rtmp, 3); + b(L_3, Assembler::EQ); + cmp(rtmp, 2); + b(L_2, Assembler::EQ); + cmp(rtmp, 1); + b(L_1, Assembler::EQ); + sha_w0(w16, w14, w8, w4, w2, tmp1, tmp2, tmp3, tmp4, st_k, st_kw, update); + b(L_done); + BIND(L_1); { + sha_w0(w14, w12, w6, w2, w16, tmp1, tmp2, tmp3, tmp4, st_k, st_kw, update); + b(L_done); + } + BIND(L_2); { + sha_w0(w12, w10, w4, w16, w14, tmp1, tmp2, tmp3, tmp4, st_k, st_kw, update); + b(L_done); + } + BIND(L_3); { + sha_w0(w10, w8, w2, w14, w12, tmp1, tmp2, tmp3, tmp4, st_k, st_kw, update); + b(L_done); + } + BIND(L_4); { + sha_w0(w8, w6, w16, w12, w10, tmp1, tmp2, tmp3, tmp4, st_k, st_kw, update); + b(L_done); + } + BIND(L_5); { + sha_w0(w6, w4, w14, w10, w8, tmp1, tmp2, tmp3, tmp4, st_k, st_kw, update); + b(L_done); + } + BIND(L_6); { + sha_w0(w4, w2, w12, w8, w6, tmp1, tmp2, tmp3, tmp4, st_k, st_kw, update); + b(L_done); + } + BIND(L_7); { + sha_w0(w2, w16, w10, w6, w4, tmp1, tmp2, tmp3, tmp4, st_k, st_kw, update); + } + BIND(L_done); +} + +/** + * SHA1 digest + * + * @param from register pointing to source array address + * @param state register pointing to state array address + */ +void MacroAssembler::kernel_sha_implCompress(Register from, Register state, + Register counter, Register table_k, + Register st_a, Register st_b, + Register st_c, Register st_d, Register st_e, + Register tmp, Register counter2, Register st_new_a, Register st_w) { + Label L_round_1, L_round_2, L_round_3, L_round_4, L_round_4_cont, L_hash_no_w; + + FloatRegister w16 = d0; //q0-q7 + FloatRegister w14 = w16->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w12 = w14->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w10 = w12->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w8 = w10->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w6 = w8->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w4 = w6->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w2 = w4->successor(FloatRegisterImpl::DOUBLE); + FloatRegister wtmp1 = w2->successor(FloatRegisterImpl::DOUBLE); + FloatRegister wtmp2 = wtmp1->successor(FloatRegisterImpl::DOUBLE); + FloatRegister wtmp3 = wtmp2->successor(FloatRegisterImpl::DOUBLE); + FloatRegister wtmp4 = wtmp3->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_k1 = wtmp4->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_k2 = st_k1->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_k = st_k2->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_kw = st_k->successor(FloatRegisterImpl::DOUBLE); + + + assert_different_registers(st_a,st_b,st_c,st_d,st_e,tmp,counter2, st_new_a, st_w); + assert_different_registers(w2,w4,w6,w8,w10,w12,w14,w16); + + lea(table_k, ExternalAddress(StubRoutines::sha1_table_addr())); + + // read initial 16 W elements + vld1_64(w16, w14, w12, w10, Address(post(from, 32)), Assembler::ALIGN_STD); + vld1_64(w8, w6, w4, w2, Address(from), Assembler::ALIGN_STD); + + // revert W + vrev64_128_8(w16, w16); + vrev64_128_8(w12, w12); + vrev64_128_8(w8, w8); + vrev64_128_8(w4, w4); + // load state + ldr(st_a, Address(post(state, 4))); + ldr(st_b, Address(post(state, 4))); + ldr(st_c, Address(post(state, 4))); + ldr(st_d, Address(post(state, 4))); + ldr(st_e, Address(state)); + sub(state, state, 16); + + mov(counter2, 0); + mov(counter, 10); + // first round + vld1_64(st_k1, st_k2, Address(table_k), Assembler::ALIGN_128); + vdup_64_32(st_k, st_k1, 0); + + BIND(L_round_1); { + sha_w(w16, w14, w12, w10, w8, w6, w4, w2, wtmp1, wtmp2, wtmp3, wtmp4, st_k, st_kw, counter2, tmp); + + sha_round1(st_b, st_c, st_d, tmp, st_new_a, 0); + vmov_32(st_w, st_kw, 1); + add(st_new_a, st_new_a, st_a, ror(32-5)); + add(st_new_a, st_new_a, st_e); + add(st_new_a, st_new_a, st_w); + + vmov_32(st_w, st_kw, 0); + sha_round1(st_a, st_b, st_c, tmp, st_e, 30); + + add(tmp, st_e, st_new_a, ror(32-5)); + add(tmp, tmp, st_d); + + mov(st_e, st_c); + mov(st_d, st_b, ror(32-30)); + mov(st_c, st_a, ror(32-30)); + mov(st_b, st_new_a); + add(st_a, tmp, st_w); + + sub(counter, counter, 1); + }cbnz(counter, L_round_1); + + mov(counter, 10); + // second round + vdup_64_32(st_k, st_k1, 1); + + BIND(L_round_2); { + sha_w(w16, w14, w12, w10, w8, w6, w4, w2, wtmp1, wtmp2, wtmp3, wtmp4, st_k, st_kw, counter2, tmp); + + sha_round2(st_b, st_c, st_d, tmp, st_new_a, 0); + vmov_32(st_w, st_kw, 1); + add(st_new_a, st_new_a, st_a, ror(32-5)); + add(st_new_a, st_new_a, st_e); + add(st_new_a, st_new_a, st_w); + + vmov_32(st_w, st_kw, 0); + sha_round2(st_a, st_b, st_c, tmp, st_e, 30); + + add(tmp, st_e, st_new_a, ror(32-5)); + add(tmp, tmp, st_d); + + mov(st_e, st_c); + mov(st_d, st_b, ror(32-30)); + mov(st_c, st_a, ror(32-30)); + mov(st_b, st_new_a); + add(st_a, tmp, st_w); + + sub(counter, counter, 1); + }cbnz(counter, L_round_2); + + mov(counter, 10); + vdup_64_32(st_k, st_k2, 0); + // third round + + BIND(L_round_3); { + sha_w(w16, w14, w12, w10, w8, w6, w4, w2, wtmp1, wtmp2, wtmp3, wtmp4, st_k, st_kw, counter2, tmp); + + sha_round3(st_b, st_c, st_d, tmp, st_new_a, 0); + vmov_32(st_w, st_kw, 1); + add(st_new_a, st_new_a, st_a, ror(32-5)); + add(st_new_a, st_new_a, st_e); + add(st_new_a, st_new_a, st_w); + + vmov_32(st_w, st_kw, 0); + sha_round3(st_a, st_b, st_c, tmp, st_e, 30); + + add(tmp, st_e, st_new_a, ror(32-5)); + add(tmp, tmp, st_d); + + mov(st_e, st_c); + mov(st_d, st_b, ror(32-30)); + mov(st_c, st_a, ror(32-30)); + mov(st_b, st_new_a); + add(st_a, tmp, st_w); + + sub(counter, counter, 1); + }cbnz(counter, L_round_3); + + mov(counter, 10); + // forth round + vdup_64_32(st_k, st_k2, 1); + + BIND(L_round_4); { + sub(counter, counter, 1); + cmp(counter, 8); + b(L_hash_no_w, Assembler::LO); + sha_w(w16, w14, w12, w10, w8, w6, w4, w2, wtmp1, wtmp2, wtmp3, wtmp4, st_k, st_kw, counter2, tmp); + b(L_round_4_cont); + BIND(L_hash_no_w); + sha_w(w16, w14, w12, w10, w8, w6, w4, w2, wtmp1, wtmp2, wtmp3, wtmp4, st_k, st_kw, counter2, tmp, false); + BIND(L_round_4_cont); + + sha_round2(st_b, st_c, st_d, tmp, st_new_a, 0); + vmov_32(st_w, st_kw, 1); + add(st_new_a, st_new_a, st_a, ror(32-5)); + add(st_new_a, st_new_a, st_e); + add(st_new_a, st_new_a, st_w); + + vmov_32(st_w, st_kw, 0); + sha_round2(st_a, st_b, st_c, tmp, st_e, 30); + + add(tmp, st_e, st_new_a, ror(32-5)); + add(tmp, tmp, st_d); + + mov(st_e, st_c); + mov(st_d, st_b, ror(32-30)); + mov(st_c, st_a, ror(32-30)); + mov(st_b, st_new_a); + add(st_a, tmp, st_w); + + }cbnz(counter, L_round_4); + + // load state + ldr(tmp, Address(post(state, 4))); + add(st_a, st_a, tmp); + ldr(tmp, Address(post(state, 4))); + add(st_b, st_b, tmp); + ldr(tmp, Address(post(state, 4))); + add(st_c, st_c, tmp); + ldr(tmp, Address(post(state, 4))); + add(st_d, st_d, tmp); + ldr(tmp, Address(state)); + add(st_e, st_e, tmp); + sub(state, state, 16); + + // save state + str(st_a, Address(post(state, 4))); + str(st_b, Address(post(state, 4))); + str(st_c, Address(post(state, 4))); + str(st_d, Address(post(state, 4))); + str(st_e, Address(state)); +} +/** + * One iteration of SHA256 algorithm + * Σ0 := (a rotr 2) xor (a rotr 13) xor (a rotr 22) + * Ma := (a and b) xor (a and c) xor (b and c) + * t2 := Σ0 + Ma + * Σ1 := (e rotr 6) xor (e rotr 11) xor (e rotr 25) + * Ch := (e and f) xor ((not e) and g) + * t1 := h + Σ1 + Ch + k[i] + w[i] + * h := g + * g := f + * f := e + * e := d + t1 + * d := c + * c := b + * b := a + * a := t1 + t2 + */ +void MacroAssembler::sha256_implCompress_iter0( + Register Da, Register Db, Register Dc, Register Dd, + Register De, Register Df, Register Dg, Register Dh, + FloatRegister Dkw, int index, + Register Dtmp, + Register Dnew_a, Register Dnew_e + ) { + assert_different_registers(Da, Db, Dc, Dd, De, Df, Dg, Dh); + + // Σ0 := (a rotr 2) xor (a rotr 13) xor (a rotr 22) + // Σ1 := (e rotr 6) xor (e rotr 11) xor (e rotr 25) + andr(Dnew_a, Da, Db); + andr(Dnew_e, Da, Dc); + eor(Dnew_a, Dnew_a, Dnew_e); + andr(Dnew_e, Db, Dc); + eor(Dnew_e, Dnew_a, Dnew_e); //Ma + + mov(Dnew_a, Da, ror(2)); + eor(Dnew_a, Dnew_a, Da, ror(13)); + eor(Dnew_a, Dnew_a, Da, ror(22)); //Σ0 + + add(Dnew_a, Dnew_a, Dnew_e); //t2 + + andr(Dnew_e, De, Df); + mvn(Dtmp, De); + andr(Dtmp, Dtmp, Dg); + eor(Dtmp, Dnew_e, Dtmp); //Ch + + mov(Dnew_e, De, ror(6)); + eor(Dnew_e, Dnew_e, De, ror(11)); + eor(Dnew_e, Dnew_e, De, ror(25)); //Σ1 + + add(Dnew_e, Dnew_e, Dtmp); + vmov_32(Dtmp, Dkw, index); + add(Dnew_e, Dnew_e, Dh); + + add(Dtmp, Dnew_e, Dtmp); //t1 + + add(Dnew_e, Dtmp, Dd); //new_e + add(Dnew_a, Dtmp, Dnew_a); //new_a +}; +/** + * Four iterations of SHA256 algorithm + */ +void MacroAssembler::sha256_implCompress_iter( + Register ra, Register rb, Register rc, Register rd, + Register re, Register rf, Register rg, Register rh, + FloatRegister Dkw1, FloatRegister Dkw2, + Register step, + Register tmp, + Register ra2, Register re2 + ) { + Label L_4, L_3, L_2, L_1, L_done; + cmp(step, 4); + b(L_4, Assembler::EQ); + cmp(step, 3); + b(L_3, Assembler::EQ); + cmp(step, 2); + b(L_2, Assembler::EQ); + cmp(step, 1); + b(L_1, Assembler::EQ); + sha256_implCompress_iter0(ra, rb, rc, rd, re, rf, rg, rh, Dkw1, 0, tmp, ra2, re2); + sha256_implCompress_iter0(ra2, ra, rb, rc, re2, re, rf, rg, Dkw1, 1, tmp, rd, rh); + sha256_implCompress_iter0(rd, ra2, ra, rb, rh, re2, re, rf, Dkw2, 0, tmp, rc, rg); + sha256_implCompress_iter0(rc, rd, ra2, ra, rg, rh, re2, re, Dkw2, 1, tmp, rb, rf); + mov(step, 4); + b(L_done); + BIND(L_1); { + sha256_implCompress_iter0(ra2, ra, rb, rc, re2, re, rf, rg, Dkw1, 0, tmp, rd, rh); + sha256_implCompress_iter0(rd, ra2, ra, rb, rh, re2, re, rf, Dkw1, 1, tmp, rc, rg); + sha256_implCompress_iter0(rc, rd, ra2, ra, rg, rh, re2, re, Dkw2, 0, tmp, rb, rf); + sha256_implCompress_iter0(rb, rc, rd, ra2, rf, rg, rh, re2, Dkw2, 1, tmp, ra, re); + mov(step, 0); + b(L_done); + } + BIND(L_2); { + sha256_implCompress_iter0(rd, ra2, ra, rb, rh, re2, re, rf, Dkw1, 0, tmp, rc, rg); + sha256_implCompress_iter0(rc, rd, ra2, ra, rg, rh, re2, re, Dkw1, 1, tmp, rb, rf); + sha256_implCompress_iter0(rb, rc, rd, ra2, rf, rg, rh, re2, Dkw2, 0, tmp, ra, re); + sha256_implCompress_iter0(ra, rb, rc, rd, re, rf, rg, rh, Dkw2, 1, tmp, ra2, re2); + mov(step, 1); + b(L_done); + } + BIND(L_3); { + sha256_implCompress_iter0(rc, rd, ra2, ra, rg, rh, re2, re, Dkw1, 0, tmp, rb, rf); + sha256_implCompress_iter0(rb, rc, rd, ra2, rf, rg, rh, re2, Dkw1, 1, tmp, ra, re); + sha256_implCompress_iter0(ra, rb, rc, rd, re, rf, rg, rh, Dkw2, 0, tmp, ra2, re2); + sha256_implCompress_iter0(ra2, ra, rb, rc, re2, re, rf, rg, Dkw2, 1, tmp, rd, rh); + mov(step, 2); + b(L_done); + } + BIND(L_4); { + sha256_implCompress_iter0(rb, rc, rd, ra2, rf, rg, rh, re2, Dkw1, 0, tmp, ra, re); + sha256_implCompress_iter0(ra, rb, rc, rd, re, rf, rg, rh, Dkw1, 1, tmp, ra2, re2); + sha256_implCompress_iter0(ra2, ra, rb, rc, re2, re, rf, rg, Dkw2, 0, tmp, rd, rh); + sha256_implCompress_iter0(rd, ra2, ra, rb, rh, re2, re, rf, Dkw2, 1, tmp, rc, rg); + mov(step, 3); + } + BIND(L_done); +}; + + /* + * Calculate Deltas w[i] and w[i+1] + * s0 := (w[i-15] rotr 7) xor (w[i-15] rotr 18) xor (w[i-15] shr 3) + * s1 := (w[i-2] rotr 17) xor (w[i-2] rotr 19) xor (w[i-2] shr 10) + * w[i] := w[i-16] + s0 + w[i-7] + s1 + */ +void MacroAssembler::sha256_w0( + FloatRegister w_m16, FloatRegister w_m15, FloatRegister w_m14, + FloatRegister w_m7, FloatRegister w_m6, + FloatRegister w_m2, + FloatRegister Qtmp_S0, FloatRegister Qtmp_S1, + FloatRegister Qtmp1){ + + vmov_64(Qtmp1, w_m15); + vmov_64(Qtmp1->successor(FloatRegisterImpl::DOUBLE), w_m14); + vshr_128_u64(Qtmp_S0, Qtmp1, 7); + vshr_128_u64(Qtmp_S1, Qtmp1, 18); + veor_128(Qtmp_S0, Qtmp_S0, Qtmp_S1); + vshr_128_u64(Qtmp_S1, Qtmp1, 35); + veor_128(Qtmp_S0, Qtmp_S0, Qtmp_S1); //S0 + + vshr_128_u64(Qtmp_S1, w_m2, 17); + vshr_128_u64(Qtmp1, w_m2, 19); + veor_128(Qtmp_S1, Qtmp_S1, Qtmp1); + vshr_128_u64(Qtmp1, w_m2, 42); + veor_128(Qtmp_S1, Qtmp_S1, Qtmp1); //S1 + + vmov_64(Qtmp1, w_m7); + vmov_64(Qtmp1->successor(FloatRegisterImpl::DOUBLE), w_m6); + vadd_128_32(Qtmp1, Qtmp1, w_m16); + vadd_128_32(Qtmp1, Qtmp1, Qtmp_S0); + vadd_128_32(w_m16, Qtmp1, Qtmp_S1); // w[i/i+1] + + vdup_64_32(w_m16, w_m16, 0); + vdup_64_32(w_m15, w_m15, 0); +} + +/* + * Calculate Deltas w[i] ... w[i+3] + */ +void MacroAssembler::sha256_w(FloatRegister w16, FloatRegister w14, + FloatRegister w12, FloatRegister w10, FloatRegister w8, + FloatRegister w6, FloatRegister w4, FloatRegister w2, + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, + FloatRegister st_kw, Register counter, Register rtmp) { + FloatRegister w15 = w16->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w13 = w14->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w11 = w12->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w9 = w10->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w7 = w8->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w5 = w6->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w3 = w4->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w1 = w2->successor(FloatRegisterImpl::DOUBLE); + + FloatRegister Dtmp1 = as_FloatRegister(tmp1->encoding()); + FloatRegister Dtmp2 = Dtmp1->successor(FloatRegisterImpl::DOUBLE); + Label L_3, L_2, L_1, L_done; + + andr(rtmp, counter, 0x3); + cmp(rtmp, 3); + b(L_3, Assembler::EQ); + cmp(rtmp, 2); + b(L_2, Assembler::EQ); + cmp(rtmp, 1); + b(L_1, Assembler::EQ); + vext_64(Dtmp1, w16, w15, 4); + vext_64(Dtmp2, w14, w13, 4); + vadd_128_32(st_kw, st_kw, tmp1); + cmp(counter, 3); + b(L_done, Assembler::LO); + sha256_w0(w16, w15, w14, w7, w6, w2, tmp1, tmp2, tmp3); + sha256_w0(w14, w13, w12, w5, w4, w16, tmp1, tmp2, tmp3); + b(L_done); + BIND(L_3); { + vext_64(Dtmp1, w12, w11, 4); + vext_64(Dtmp2, w10, w9, 4); + vadd_128_32(st_kw, st_kw, tmp1); + cmp(counter, 3); + b(L_done, Assembler::LO); + sha256_w0(w12, w11, w10, w3, w2, w14, tmp1, tmp2, tmp3); + sha256_w0(w10, w9, w8, w1, w16, w12, tmp1, tmp2, tmp3); + b(L_done); + } + BIND(L_2); { + vext_64(Dtmp1, w8, w7, 4); + vext_64(Dtmp2, w6, w5, 4); + vadd_128_32(st_kw, st_kw, tmp1); + cmp(counter, 3); + b(L_done, Assembler::LO); + sha256_w0(w8, w7, w6, w15, w14, w10, tmp1, tmp2, tmp3); + sha256_w0(w6, w5, w4, w13, w12, w8, tmp1, tmp2, tmp3); + b(L_done); + } + BIND(L_1); { + vext_64(Dtmp1, w4, w3, 4); + vext_64(Dtmp2, w2, w1, 4); + vadd_128_32(st_kw, st_kw, tmp1); + cmp(counter, 3); + b(L_done, Assembler::LO); + sha256_w0(w4, w3, w2, w11, w10, w6, tmp1, tmp2, tmp3); + sha256_w0(w2, w1, w16, w9, w8, w4, tmp1, tmp2, tmp3); + } + BIND(L_done); +} + +/** + * SHA256 digest + * + * @param from register pointing to source array address + * @param state register pointing to state array address + */ +void MacroAssembler::kernel_sha256_implCompress(Register from, Register state, + Register counter, Register table_k, + Register ra, Register rb, Register rc, Register rd, Register re, + Register rf, Register rg, Register rh, + Register ra2, Register re2) { + + Label L_hash_loop, L_hash_loop_done, L_hash_no_w; + lea(table_k, ExternalAddress(StubRoutines::sha256_table_addr())); + + // read next k + vld1_64(d14, d15, Address(post(table_k, 16)), Assembler::ALIGN_128); + // read initial 16 W elements in q8-q11 + vld1_64(d16, d17, d18, d19, Address(post(from, 32)), Assembler::ALIGN_STD); // read from + vld1_64(d20, d21, d22, d23, Address(post(from, 32)), Assembler::ALIGN_STD); // read from + // revert W + vrev32_128_8(q8, q8); + vrev32_128_8(q9, q9); + vrev32_128_8(q10, q10); + vrev32_128_8(q11, q11); + + vadd_128_32(q7, q7, q8); // k + w + + vdup_64_32(d31, d23, 1); //w1 + vdup_64_32(d30, d23, 0); //w2 + vdup_64_32(d29, d22, 1); //w3 + vdup_64_32(d28, d22, 0); //w4 + vdup_64_32(d27, d21, 1); //w5 + vdup_64_32(d26, d21, 0); //w6 + vdup_64_32(d25, d20, 1); //w7 + vdup_64_32(d24, d20, 0); //w8 + vdup_64_32(d23, d19, 1); //w9 + vdup_64_32(d22, d19, 0); //w10 + vdup_64_32(d21, d18, 1); //w11 + vdup_64_32(d20, d18, 0); //w12 + vdup_64_32(d19, d17, 1); //w13 + vdup_64_32(d18, d17, 0); //w14 + vdup_64_32(d17, d16, 1); //w15 + vdup_64_32(d16, d16, 0); //w16 + + mov(counter, 16); + // load state + push(state, sp); + ldr(ra, Address(post(state, 4))); + ldr(rb, Address(post(state, 4))); + ldr(rc, Address(post(state, 4))); + ldr(rd, Address(post(state, 4))); + ldr(re, Address(post(state, 4))); + ldr(rf, Address(post(state, 4))); + ldr(rg, Address(post(state, 4))); + ldr(rh, Address(state)); + + const Register tmp = from; + const Register step = state; + + // calculate deltas + sha256_w0(d16, d17, d18, d25, d26, d30, q0, q1, q2); + sha256_w0(d18, d19, d20, d27, d28, d16, q0, q1, q2); + + mov(step, 0); // use state for internal counter + sub(counter, counter, 1); + + sha256_implCompress_iter(ra, rb, rc, rd, re, rf, rg, rh, d14, d15, + step, + tmp, ra2, re2); + + BIND(L_hash_loop); { + // read next k + vld1_64(d14, d15, Address(post(table_k, 16)), Assembler::ALIGN_128); + //calculate deltas + sha256_w(q8, q9, q10, q11, q12, q13, q14, q15, + q0, q1, q2, + q7, + counter, tmp); + + //calculate state + sha256_implCompress_iter(ra, rb, rc, rd, re, rf, rg, rh, d14, d15, + step, + tmp, ra2, re2); + sub(counter, counter, 1); + } cbnz(counter, L_hash_loop); + + pop(state, sp); + + // load initial state and add to current state + ldr(tmp, Address(post(state, 4))); + add(rb, rb, tmp); + ldr(tmp, Address(post(state, 4))); + add(rc, rc, tmp); + ldr(tmp, Address(post(state, 4))); + add(rd, rd, tmp); + ldr(tmp, Address(post(state, 4))); + add(ra2, ra2, tmp); + ldr(tmp, Address(post(state, 4))); + add(rf, rf, tmp); + ldr(tmp, Address(post(state, 4))); + add(rg, rg, tmp); + ldr(tmp, Address(post(state, 4))); + add(rh, rh, tmp); + ldr(tmp, Address(state)); + add(re2, re2, tmp); + sub(state, state, 28); + + // save state + str(rb, Address(post(state, 4))); + str(rc, Address(post(state, 4))); + str(rd, Address(post(state, 4))); + str(ra2, Address(post(state, 4))); + str(rf, Address(post(state, 4))); + str(rg, Address(post(state, 4))); + str(rh, Address(post(state, 4))); + str(re2, Address(post(state, 4))); +} + +/** + * SHA512 Sigma + * Sigma(x) = ROTR(x, sh1) XOR ROTR(x, sh2) XOR ROTR(x, sh3) + */ +void MacroAssembler::sha512_sigma(FloatRegister x, + FloatRegister Qtmp, FloatRegister Dsigma, int sh1, int sh2, int sh3) { + FloatRegister Dtmp0 = as_FloatRegister(Qtmp->encoding()); + FloatRegister Dtmp1 = Dtmp0->successor(FloatRegisterImpl::DOUBLE); + assert_different_registers(x, Dtmp0, Dtmp1, Dsigma); + + vshr_64_u64(Dtmp0, x, sh1); + vshl_64_64(Dtmp1, x, 64-sh1); + vorr_64(Dsigma, Dtmp0, Dtmp1); + + vshr_64_u64(Dtmp0, x, sh2); + vshl_64_64(Dtmp1, x, 64-sh2); + vorr_64(Dtmp0, Dtmp0, Dtmp1); + + veor_64(Dsigma, Dsigma, Dtmp0); + + vshr_64_u64(Dtmp0, x, sh3); + vshl_64_64(Dtmp1, x, 64-sh3); + vorr_64(Dtmp0, Dtmp0, Dtmp1); + + veor_64(Dsigma, Dsigma, Dtmp0); +} + +/** + * SHA512 Delta + * Delta(x) = ROTR(x, sh1) XOR ROTR(x, sh2) XOR SHR(x, sh3) + */ +void MacroAssembler::sha512_delta(FloatRegister x, + FloatRegister Qtmp, FloatRegister Ddelta, int sh1, int sh2, int sh3) { + FloatRegister Dtmp0 = as_FloatRegister(Qtmp->encoding()); + FloatRegister Dtmp1 = Dtmp0->successor(FloatRegisterImpl::DOUBLE); + assert_different_registers(x, Dtmp0, Dtmp1, Ddelta); + + vshr_64_u64(Dtmp0, x, sh1); + vshl_64_64(Dtmp1, x, 64-sh1); + vorr_64(Ddelta, Dtmp0, Dtmp1); + + vshr_64_u64(Dtmp0, x, sh2); + vshl_64_64(Dtmp1, x, 64-sh2); + vorr_64(Dtmp0, Dtmp0, Dtmp1); + + veor_64(Ddelta, Ddelta, Dtmp0); + + vshr_64_u64(Dtmp0, x, sh3); + + veor_64(Ddelta, Ddelta, Dtmp0); +} + +/** + * SHA512 Ch + * Ch(x, y, z) = (x AND y) XOR ( NOT x AND z) + */ +void MacroAssembler::sha512_ch(FloatRegister x, FloatRegister y, FloatRegister z, + FloatRegister Dtmp, FloatRegister Dch) { + assert_different_registers(x, Dtmp, Dch); + + vmvn_64(Dtmp, x); + vand_64(Dtmp, Dtmp, z); + + vand_64(Dch, x, y); + veor_64(Dch, Dtmp, Dch); +} + +/** + * SHA512 Maj + * Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z) + */ +void MacroAssembler::sha512_maj(FloatRegister x, FloatRegister y, FloatRegister z, + FloatRegister Dtmp, FloatRegister Dmaj) { + assert_different_registers(x, Dtmp, Dmaj); + + vand_64(Dmaj, x, y); + vand_64(Dtmp, x, z); + veor_64(Dmaj, Dmaj, Dtmp); + vand_64(Dtmp, y, z); + veor_64(Dmaj, Dmaj, Dtmp); +} + +/** + * SHA512 digest + * + * @param from register pointing to source array address + * @param state register pointing to state array address + */ +void MacroAssembler::kernel_sha512_implCompress(Register from, Register state, + Register counter, Register table_k) { + Label L_hash_loop, L_hash_no_w; + FloatRegister st_a = d18; //q9-q12 + FloatRegister st_b = st_a->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_c = st_b->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_d = st_c->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_e = st_d->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_f = st_e->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_g = st_f->successor(FloatRegisterImpl::DOUBLE); + FloatRegister st_h = st_g->successor(FloatRegisterImpl::DOUBLE); + + FloatRegister w16 = d0; //q0-q7 + FloatRegister w15 = w16->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w14 = w15->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w13 = w14->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w12 = w13->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w11 = w12->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w10 = w11->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w9 = w10->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w8 = w9->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w7 = w8->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w6 = w7->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w5 = w6->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w4 = w5->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w3 = w4->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w2 = w3->successor(FloatRegisterImpl::DOUBLE); + FloatRegister w1 = w2->successor(FloatRegisterImpl::DOUBLE); + + FloatRegister t1 = d26; + FloatRegister t2 = d27; + FloatRegister new_a = st_h; + FloatRegister new_e = st_d; + FloatRegister new_new_a = st_g; + FloatRegister new_new_e = st_c; + + FloatRegister w0 = w1->successor(FloatRegisterImpl::DOUBLE); + assert_different_registers(st_a,st_b,st_c,st_d,st_e,st_f,st_g,st_h); + assert_different_registers(w0,w1,w2,w3,w4,w5,w6,w7); + assert_different_registers(w8,w9,w10,w11,w12,w13,w14,w15,w16); + + lea(table_k, ExternalAddress(StubRoutines::sha512_table_addr())); + + // read initial 16 W elements + vld1_64(w16, w15, w14, w13, Address(post(from, 32)), Assembler::ALIGN_STD); + vld1_64(w12, w11, w10, w9, Address(post(from, 32)), Assembler::ALIGN_STD); + vld1_64(w8, w7, w6, w5, Address(post(from, 32)), Assembler::ALIGN_STD); + vld1_64(w4, w3, w2, w1, Address(from), Assembler::ALIGN_STD); + // read initial state to a,b,c,d,e,f,g,h + vld1_64(st_a, st_b, st_c, st_d, Address(post(state, 32)), Assembler::ALIGN_STD); + vld1_64(st_e, st_f, st_g, st_h, Address(state), Assembler::ALIGN_STD); + sub(state, state, 32); + + // revert W + vrev64_128_8(w16, w16); + vrev64_128_8(w14, w14); + vrev64_128_8(w12, w12); + vrev64_128_8(w10, w10); + vrev64_128_8(w8, w8); + vrev64_128_8(w6, w6); + vrev64_128_8(w4, w4); + vrev64_128_8(w2, w2); + + + mov(counter, 40); + BIND(L_hash_loop); { + sub(counter, counter, 1); + // first iteration + // calculate T1 + // read K + vld1_64(d30, Address(post(table_k, 8)), Assembler::ALIGN_64); + vadd_64_64(d31, st_h, w16); + sha512_ch(st_e, st_f, st_g, t2, t1); + sha512_sigma(st_e, q14, t2, 14, 18, 41); + vadd_128_64(q13, q13, q15); + vadd_64_64(t1, t1, t2); + + // calculate T2 + sha512_maj(st_a, st_b, st_c, d30, d31); + sha512_sigma(st_a, q14, t2, 28, 34, 39); + vadd_64_64(t2, t2, d31); + + vadd_64_64(new_a, t1, t2); + vadd_64_64(new_e, st_d, t1); + + // second iteration + // calculate T1 + // read K + vld1_64(d30, Address(post(table_k, 8)), Assembler::ALIGN_64); + vadd_64_64(d31, st_g, w15); + sha512_ch(new_e, st_e, st_f, t2, t1); + sha512_sigma(new_e, q14, t2, 14, 18, 41); + vadd_128_64(q13, q13, q15); + vadd_64_64(t1, t1, t2); + + // calculate T2 + sha512_maj(new_a, st_a, st_b, d30, d31); + sha512_sigma(new_a, q14, t2, 28, 34, 39); + vadd_64_64(t2, t2, d31); + + vadd_64_64(new_new_a, t1, t2); + vadd_64_64(new_new_e, st_c, t1); + + // restore a,b,c,d,e,f,g,h sequence + vswp_128(st_g, st_a); + vswp_128(st_g, st_c); + vswp_128(st_g, st_e); + + cmp(counter, 8); + b(L_hash_no_w, Assembler::LO); + + // calculate W[+1], W[+2] + sha512_delta(w15, q14, t1, 1, 8, 7); + sha512_delta(w2, q14, d30, 19, 61, 6); + sha512_delta(w14, q14, t2, 1, 8, 7); + sha512_delta(w1, q14, d31, 19, 61, 6); + + vadd_128_64(w16, w16, t1); + vadd_128_64(w16, w16, q15); + vadd_64_64(w16, w16, w7); + vadd_64_64(w15, w15, w6); + + BIND(L_hash_no_w); + + vswp_128(w16, w14); + vswp_128(w14, w12); + vswp_128(w12, w10); + vswp_128(w10, w8); + vswp_128(w8, w6); + vswp_128(w6, w4); + vswp_128(w4, w2); + } cbnz(counter, L_hash_loop); + // read initial state to w16 - w9 + vld1_64(w16, w15, w14, w13, Address(post(state, 32)), Assembler::ALIGN_STD); + vld1_64(w12, w11, w10, w9, Address(state), Assembler::ALIGN_STD); + sub(state, state, 32); + + // update state + vadd_128_64(st_a, st_a, w16); + vadd_128_64(st_c, st_c, w14); + vadd_128_64(st_e, st_e, w12); + vadd_128_64(st_g, st_g, w10); + + // store state + vst1_64(st_a, st_b, st_c, st_d, Address(post(state, 32)), Assembler::ALIGN_STD); + vst1_64(st_e, st_f, st_g, st_h, Address(state), Assembler::ALIGN_STD); +} + +void MacroAssembler::bfc_impl(Register Rd, int lsb, int width, Condition cond) { + if (width > 15 && lsb == 0) { + lsr(Rd, Rd, width); + lsl(Rd, Rd, width); + } else if (width > 15 && lsb + width == 32) { + lsl(Rd, Rd, 32 - lsb); + lsr(Rd, Rd, 32 - lsb); + } else { + const int lsb1 = (lsb & 1); + int w1 = width <= 8 - lsb1 ? width : 8 - lsb1; + while (width) { + bic(Rd, Rd, ((1 << w1) - 1) << lsb); + width -= w1; + lsb += w1; + w1 = width > 8 ? 8 : width; + } + } +} + +// get_thread can be called anywhere inside generated code so we need +// to save whatever non-callee save context might get clobbered by the +// call to the C thread_local lookup call or, indeed, the call setup +// code. x86 appears to save C arg registers. + +void MacroAssembler::get_thread(Register dst) { + // call pthread_getspecific + // void * pthread_getspecific(pthread_key_t key); + + // Save all call-clobbered regs except dst, plus rscratch1 and rscratch2. + RegSet saved_regs = RegSet::range(r0, r3) + rscratch1 + rscratch2 + lr - dst; + push(saved_regs, sp); + + // Align stack and save value for return + mov(c_rarg1, sp); + sub(sp, sp, wordSize); + bic(sp, sp, 7); + str(c_rarg1, Address(sp)); + + mov(rscratch2, CAST_FROM_FN_PTR(address, Thread::current)); + + bl(rscratch2); + //undo alignment + ldr(sp, Address(sp)); + + if (dst != c_rarg0) { + mov(dst, c_rarg0); + } + + // restore pushed registers + pop(saved_regs, sp); +} + +#ifdef COMPILER2 +// 24-bit word range == 26-bit byte range +bool check26(int offset) { + // this could be simplified, but it mimics encoding and decoding + // an actual branch insrtuction + int off1 = offset << 6 >> 8; + int encoded = off1 & ((1<<24)-1); + int decoded = encoded << 8 >> 6; + return offset == decoded; +} + +// Perform some slight adjustments so the default 32MB code cache +// is fully reachable. +static inline address first_cache_address() { + return CodeCache::low_bound() + sizeof(HeapBlock::Header); +} +static inline address last_cache_address() { + return CodeCache::high_bound() - NativeInstruction::arm_insn_sz; +} + +// Can we reach target using unconditional branch or call from anywhere +// in the code cache (because code can be relocated)? +bool MacroAssembler::_reachable_from_cache(address target) { +#ifdef __thumb__ + if ((1 & (intptr_t)target) != 0) { + // Return false to avoid 'b' if we need switching to THUMB mode. + return false; + } +#endif + + address cl = first_cache_address(); + address ch = last_cache_address(); + + if (ForceUnreachable) { + // Only addresses from CodeCache can be treated as reachable. + if (target < CodeCache::low_bound() || CodeCache::high_bound() <= target) { + return false; + } + } + + intptr_t loffset = (intptr_t)target - (intptr_t)cl; + intptr_t hoffset = (intptr_t)target - (intptr_t)ch; + + return check26(loffset - 8) && check26(hoffset - 8); +} + +bool MacroAssembler::_cache_fully_reachable() { + address cl = first_cache_address(); + address ch = last_cache_address(); + return _reachable_from_cache(cl) && _reachable_from_cache(ch); +} + +bool MacroAssembler::reachable_from_cache(address target) { + assert(CodeCache::contains(pc()), "not supported"); + return _reachable_from_cache(target); +} + +bool MacroAssembler::cache_fully_reachable() { + return _cache_fully_reachable(); +} + +// IMPORTANT: does not generate mt-safe patchable code +void MacroAssembler::call(address target, RelocationHolder rspec, Condition cond) { + Register scratch = lr; + assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); + if (reachable_from_cache(target)) { + relocate(rspec); + bl(target, cond); + return; + } + + mov(scratch, (intptr_t)target, cond); + bl(scratch, cond); +} + +// IMPORTANT: does not generate mt-safe patchable code. C2 only uses this method +// for calls into runtime which do not need mt-safe patching +void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch, Condition cond) { + assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); + if (reachable_from_cache(target)) { + relocate(rtype); + b(target, cond); + return; + } + + mov(scratch, (intptr_t)target, cond); + b(scratch, cond); +} + +void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { + // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM + if (UseStackBanging) { + const int page_size = os::vm_page_size(); + + sub(tmp, sp, StackShadowPages*page_size); + strb(r0, Address(tmp)); + for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { + strb(r0, pre(tmp, -0xff0)); + } + } +} + +void MacroAssembler::floating_cmp(Register dst) { + vmrs(dst); + orr(dst, dst, 0x08000000); + eor(dst, dst, dst, lsl(3)); + mov(dst, dst, asr(30)); +} + +void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rmark, Register Rscratch, Register Rscratch2) { + assert(Roop != Rscratch, ""); + assert(Roop != Rmark, ""); + assert(Rbox != Rscratch, ""); + assert(Rbox != Rmark, ""); + + Label fast_lock, done; + + if (UseBiasedLocking && !UseOptoBiasInlining) { + Label failed; + biased_locking_enter(Roop, Rmark, Rscratch, Rscratch2, false, done, &failed); + bind(failed); + } + + ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); + tst(Rmark, markOopDesc::unlocked_value); + b(fast_lock, Assembler::NE); + + // Check for recursive lock + // See comments in InterpreterMacroAssembler::lock_object for + // explanations on the fast recursive locking check. + // -1- test low 2 bits + movs(Rscratch, Rmark, lsl(30)); + // -2- test (hdr - SP) if the low two bits are 0 + sub(Rscratch, Rmark, sp, Assembler::EQ); + movs(Rscratch, Rscratch, lsr(exact_log2(os::vm_page_size())), Assembler::EQ); + // If still 'eq' then recursive locking OK + // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107) + str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + b(done); + + bind(fast_lock); + str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + + membar(StoreStore); + ldrex(Rscratch, Address(Roop, oopDesc::mark_offset_in_bytes())); + cmp(Rscratch, Rmark); + strex(Rscratch, Rbox, Address(Roop, oopDesc::mark_offset_in_bytes()), Assembler::EQ); + cmp(Rscratch, 0, Assembler::EQ); + membar(AnyAny); + + bind(done); +} + +void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2) { + Register Rmark = Rscratch2; + + assert(Roop != Rscratch, ""); + assert(Roop != Rmark, ""); + assert(Rbox != Rscratch, ""); + assert(Rbox != Rmark, ""); + + Label done; + + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_exit(Roop, Rscratch, done); + } + + ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + // If hdr is NULL, we've got recursive locking and there's nothing more to do + cmp(Rmark, 0); + b(done, Assembler::EQ); + + // Restore the object header + membar(AnyAny); + ldrex(Rscratch, Address(Roop, oopDesc::mark_offset_in_bytes())); + cmp(Rscratch, Rmark); + strex(Rscratch, Rbox, Address(Roop, oopDesc::mark_offset_in_bytes()), Assembler::EQ); + cmp(Rscratch, 0, Assembler::EQ); + + membar(StoreLoad); + + bind(done); +} + +#endif --- /dev/null 2018-09-25 19:25:11.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/macroAssembler_aarch32.hpp 2018-09-25 19:25:11.000000000 +0300 @@ -0,0 +1,1115 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_HPP +#define CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_HPP + +#include "asm/assembler.hpp" +#include "nativeInst_aarch32.hpp" + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + friend class LIR_Assembler; + friend class G1BarrierSetAssembler; + + using Assembler::mov; + + protected: + + // Support for VM calls + // + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + virtual void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call + Label *retaddr = NULL + ); + + virtual void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments, // the number of arguments to pop after the call + Label &retaddr) { + call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); + } + + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base + // returns the register which contains the thread upon return. If a thread register has been + // specified, the return value will correspond to that register. If no last_java_sp is specified + // (noreg) than rsp will be used instead. + virtual void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + public: + void init_unseen_bytecodes(); + MacroAssembler(CodeBuffer* code) : Assembler(code) { init_unseen_bytecodes();} + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + void safepoint_poll(Label& slow_path); + void safepoint_poll_acquire(Label& slow_path); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); + + // Biased locking support + // obj_reg must be loaded up with the appropriate values. + // swap_reg is killed. + // tmp_reg and tmp_reg2 shall be supplied. + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + // Returns offset of first potentially-faulting instruction for null + // check info (currently consumed only by C1). If + // swap_reg_contains_mark is true then returns -1 as it is assumed + // the calling code has already passed any potential faults. + int biased_locking_enter(Register obj_reg, + Register swap_reg, Register tmp_reg, Register tmp_reg2, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); + + + // Helper functions for statistics gathering. + // Unconditional atomic increment. + void atomic_inc(Register counter_addr, Register tmp); + void atomic_inc(Address counter_addr, Register tmp1, Register tmp2) { + lea(tmp1, counter_addr); + atomic_inc(tmp1, tmp2); + } + // Load Effective Address + void lea(Register r, const Address &a) { + InstructionMark im(this); + code_section()->relocate(inst_mark(), a.rspec()); + a.lea(this, r); + } + + virtual void _call_Unimplemented(address call_site) { + mov(rscratch2, call_site); + stop("HALT"); + } + +#define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) + +// macro assembly operations needed for aarch32 + +private: + + int push(unsigned int bitset, Register stack); + int pop(unsigned int bitset, Register stack); + +public: + + void mov(Register dst, Address a, Condition cond = C_DFLT); + + void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } + void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } + + // now mov instructions for loading absolute addresses and 32bit immediates + + inline void mov(Register dst, address addr, Condition cond = C_DFLT) { + // TODO: Do Address end up as address and then passing through this method, after + // being marked for relocation elsewhere? If not (as I suspect) then this can + // be relaxed to mov_immediate to potentially produce shorter code sequences. + mov_immediate32(dst, (uint32_t)addr, cond, false); + } + + inline void mov(Register dst, long l, Condition cond = C_DFLT) { + mov(dst, (uint32_t)l, cond); + } + inline void mov(Register dst, unsigned long l, Condition cond = C_DFLT) { + mov(dst, (uint32_t)l, cond); + } + inline void mov(Register dst, int i, Condition cond = C_DFLT) { + mov(dst, (uint32_t)i, cond); + } +#ifdef COMPILER2 + inline void mov(Register dst, jlong i, Condition cond = C_DFLT) { + assert(!(i >> 32), "must be 32-bit"); // really a 32-bit value contained in jlong. not sign extended! + mov(dst, (uint32_t)i, cond); + } + inline void mov(Register dst, julong i, Condition cond = C_DFLT) { + assert(!(i >> 32), "must be 32-bit"); + mov(dst, (uint32_t)i, cond); + } +#endif + inline void mov(Register dst, uint32_t i, Condition cond = C_DFLT) { + mov_immediate(dst, i, cond, false); + } + + inline void mov(Register dst, Register src, Condition cond = C_DFLT) { + Assembler::mov(dst, src, cond); + } + inline void mov(Register dst, Register src, shift_op shift, + Condition cond = C_DFLT) { + Assembler::mov(dst, src, shift, cond); + } + // TODO add sflag compatibility + void movptr(Register r, uintptr_t imm32, Condition cond = C_DFLT); + + // to reduce the chance for mistake these shall overload the mvn(Register, Register) variant + using Assembler::mvn; + using Assembler::mvns; + inline void mvn(Register dst, uint32_t i, Condition cond = C_DFLT) { + mov_immediate(dst, ~i, cond, false); + } + inline void mvns(Register dst, uint32_t i, Condition cond = C_DFLT) { + mov_immediate(dst, ~i, cond, true); + } + + void ret(Register reg); + + // Both of these are aarch64 instructions that can easily be emulated + // Note that this does not quite have the same semantics as aarch64 + // version as this updates the s flag. + void cbz(Register r, Label& l) { + cmp(r, 0); + b(l, EQ); + } + void cbnz(Register r, Label& l) { + cmp(r, 0); + b(l, NE); + } + void tbz(Register r, unsigned bit, Label& l) { + tst(r, 1 << bit); + b(l, EQ); + } + void tbnz(Register r, unsigned bit, Label& l) { + tst(r, 1 << bit); + b(l, NE); + } + + void addmw(Address a, Register incr, Register scratch) { + ldr(scratch, a); + add(scratch, scratch, incr); + str(scratch, a); + } + + // Add constant to memory word + void addmw(Address a, int imm, Register scratch) { + ldr(scratch, a); + if (imm > 0) + add(scratch, scratch, (unsigned)imm); + else + sub(scratch, scratch, (unsigned)-imm); + str(scratch, a); + } + +// XXX stubs + + // macro instructions for accessing and updating floating point + // status register + // + // FPSR : op1 == 011 + // CRn == 0100 + // CRm == 0100 + // op2 == 001 + + inline void get_fpsr(Register reg = as_Register(0xf)) { + vmrs(reg); + } + + inline void set_fpsr(Register reg) { + vmsr(reg); + } + + inline void clear_fpsr() { + mov(rscratch1, 0); + set_fpsr(rscratch1); + } + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + virtual void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + + static address target_addr_for_insn(address insn_addr, unsigned insn); + static address target_addr_for_insn(address insn_addr) { + unsigned insn = *(unsigned*)insn_addr; + return target_addr_for_insn(insn_addr, insn); + } + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + static int pd_patch_instruction_size(address branch, address target); + static void pd_patch_instruction(address branch, address target) { + pd_patch_instruction_size(branch, target); + } + +#ifndef PRODUCT + static void pd_print_patched_instruction(address branch); +#endif + + static int patch_oop(address insn_addr, address o); + + // The following 4 methods return the offset of the appropriate move instruction + + // Support for fast byte/short loading with zero extension (depending on particular CPU) + int load_unsigned_byte(Register dst, Address src); + int load_unsigned_short(Register dst, Address src); + + // Support for fast byte/short loading with sign extension (depending on particular CPU) + int load_signed_byte(Register dst, Address src); + int load_signed_short(Register dst, Address src); + + // Support for sign-extension (hi:lo = extend_sign(lo)) + void extend_sign(Register hi, Register lo); + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + // Support for inc/dec with optimal instruction selection depending on value. + // increment()/decrement() calls with an address destination will need to use + // rscratch1 to load the value to be incremented. increment()/decrement() + // calls which add or subtract a constant value greater than 2^12 will need + // to use rscratch2 to hold the constant. So, a register increment()/ + // decrement() may trash rscratch2, and an address increment()/decrement() + // may trash rscratch1 and rscratch2. + void decrement(Register reg, int value = 1); + void decrement(Address dst, int value = 1); + void increment(Register reg, int value = 1); + void increment(Address dst, int value = 1); + + // Alignment + void align(int modulus); + + // Stack frame creation/removal + // + // VM and intepreter code may have different stack layouts. enter/leave default layout + // is selected by FrameAPCS option. One can make enter/leave to use VMFrameAPCS instead. + void enter(bool as_apcs = FrameAPCS) { + if (as_apcs) { + mov(rscratch2, sp); + stmdb(sp, RegSet::of(rfp, rscratch2, lr, r15_pc).bits()); + sub(rfp, rscratch2, 4); + } else { + stmdb(sp, RegSet::of(rfp, lr).bits()); + add(rfp, sp, wordSize); + } + } + + void leave(bool as_apcs = FrameAPCS) { + if (as_apcs) { + ldmea(rfp, RegSet::of(rfp, sp, lr).bits(), false/*wb*/); + } else { + sub(sp, rfp, wordSize); + ldmia(sp, RegSet::of(rfp, lr).bits()); + } + } + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double}; + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, bool + check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result (Register oop_result, Register thread); + void get_vm_result_2(Register metadata_result, Register thread); + + // These always tightly bind to MacroAssembler::call_VM_base + // bypassing the virtual implementation + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); + + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2, Register arg_3); + + // These always tightly bind to MacroAssembler::call_VM_leaf_base + // bypassing the virtual implementation + void super_call_VM_leaf(address entry_point); + void super_call_VM_leaf(address entry_point, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); + + // last Java Frame (fills frame anchor) + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc, + Register scratch); + + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Label &last_java_pc, + Register scratch); + + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + Register last_java_pc, + Register scratch); + + void reset_last_Java_frame(Register thread); + + // thread in the default location (rthread) + void reset_last_Java_frame(bool clear_fp); + + // Stores + void store_check(Register obj); // store check for obj - register is destroyed afterwards + void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) + + void resolve_jobject(Register value, Register thread, Register tmp); + + // C 'boolean' to Java boolean: x == 0 ? 0 : 1 + void c2bool(Register x); + + // oop manipulations + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + void cmp_klass(Register oop, Register trial_klass, Register tmp); + + void resolve_oop_handle(Register result, Register tmp); + void load_mirror(Register dst, Register method, Register tmp); + + void access_load_word_at(BasicType type, DecoratorSet decorators, Register dst, Address src, + Register tmp1, Register tmp_thread); + + void access_store_word_at(BasicType type, DecoratorSet decorators, Address dst, Register src, + Register tmp1, Register tmp_thread); + + void access_load_tos_at(BasicType type, DecoratorSet decorators, Address src, + Register tmp1, Register tmp_thread); + + void access_store_tos_at(BasicType type, DecoratorSet decorators, Address dst, + Register tmp1, Register tmp_thread); + + void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + + void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, + Register thread_tmp = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, + Register tmp_thread = noreg, DecoratorSet decorators = 0); + + // Used for storing NULL. All other oop constants should be + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst, Register tmp); + + void load_prototype_header(Register dst, Register src); + + void store_klass_gap(Register dst, Register src); + + // This dummy is to prevent a call to store_heap_oop from + // converting a zero (like NULL) into a Register by giving + // the compiler two choices it can't resolve + + void store_heap_oop(Address dst, void* dummy); + + // Push and pop everything that might be clobbered by a native + // runtime call except rscratch1 and rscratch2. (They are always + // scratch, so we don't have to protect them.) Only save the f0-f15 + // and do not save f32-f63 even if present. + void push_call_clobbered_registers(); + void pop_call_clobbered_registers(); + + void push_CPU_state(); + void pop_CPU_state() ; + + // Round up to a power of two + void round_to(Register reg, int modulus); + + // allocation + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + + void zero_memory(Register addr, Register len, Register t1); + void verify_tlab(); + + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& no_such_interface, + bool return_method = true); + + // virtual method calling + // n.b. x86 allows RegisterOrConstant for vtable_index + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg and temp2_reg can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes, condition codes will be Z on success, NZ on failure. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success); + + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + + // Debugging + + // only if +VerifyOops + void verify_oop(Register reg, const char* s = "broken oop"); + void verify_oop_addr(Address addr, const char * s = "broken oop addr"); + +// TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // only if +VerifyFPU + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + + // prints msg and continues + void warn(const char* msg); + + static void debug32(char* msg, int32_t pc, int32_t regs[]); + + void untested() { stop("untested"); } + + void unimplemented(const char* what = ""); + +#define should_not_reach_here() should_not_reach_here_line(__FILE__, __LINE__) + void should_not_reach_here_line(const char *file, int line) { +#ifdef ASSERT + mov(rscratch1, line); + reg_printf_important(file); + reg_printf_important(": %d", rscratch1); +#endif + stop("should_not_reach_here"); + } + + // Stack overflow checking + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + // bang with random value from r0 + if (operand_valid_for_add_sub_immediate(offset)) { + sub(rscratch2, sp, offset); + strb(r0, Address(rscratch2)); + } else { + mov(rscratch2, offset); + strb(r0, Address(sp, rscratch2, Assembler::lsl(), Address::SUB)); + } + } + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Also, clobbers tmp + void bang_stack_size(Register size, Register tmp); + + // Check for reserved stack access in method being exited (for JIT) + void reserved_stack_check(); + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + + // Support for serializing memory accesses between threads + void serialize_memory(Register thread, Register tmp); + + // Arithmetics + + void addptr(Address dst, int32_t src) { + lea(rscratch2, dst); + ldr(rscratch1, Address(rscratch2)); + add(rscratch1, rscratch1, src); + str(rscratch1, Address(rscratch2)); + } + + void cmpptr(Register src1, Address src2); + void cmpoop(Register obj1, Register obj2); + + void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, + Label &suceed, Label *fail); + void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, + Label &suceed, Label *fail); + + void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, + Label &suceed, Label *fail); + + void atomic_add(Register prev, RegisterOrConstant incr, Register addr); + void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); + + void atomic_xchg(Register prev, Register newv, Register addr); + void atomic_xchgw(Register prev, Register newv, Register addr); + + void orptr(Address adr, RegisterOrConstant src) { + ldr(rscratch1, adr); + if (src.is_register()) + orr(rscratch1, rscratch1, src.as_register()); + else + orr(rscratch1, rscratch1, src.as_constant()); + str(rscratch1, adr); + } + + // Calls + + void trampoline_call(Address entry, CodeBuffer *cbuf = NULL); + + static bool far_branches() { + return ReservedCodeCacheSize > branch_range; + } + + // Jumps that can reach anywhere in the code cache. + // Trashes tmp. + void far_call(Address entry, CodeBuffer *cbuf = NULL); + void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1); + + static int far_branch_size() { + if (far_branches()) { + if (VM_Version::features() & (FT_ARMV7 | FT_ARMV6T2)) { + return 3 * NativeInstruction::arm_insn_sz; // movw, movt, br + } else { + return 5 * NativeInstruction::arm_insn_sz; // mov, 3 orr, br + } + } else { + return NativeInstruction::arm_insn_sz; // br + } + } + + // Emit the CompiledIC call idiom + void ic_call(address entry, jint method_index = 0); + + // Data + void mov_metadata(Register dst, Metadata* obj); + Address allocate_metadata_address(Metadata* obj); + Address constant_oop_address(jobject obj); + + void movoop(Register dst, jobject obj, bool immediate = false); + + void far_load(Register dst, address addr); + void far_load_oop(Register dst, int oop_index); + void far_load_metadata(Register dst, int metadata_index); + void far_load_const(Register dst, address const); + + + // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. + void kernel_crc32(Register crc, Register buf, Register len, + Register table0, Register table1, Register table2, Register table3, + Register tmp, Register tmp2, Register tmp3, int is_crc32c); + //AES code for com.sun.crypto.provider.AESCrypt::encryptBlock() intrinsic. + void kernel_aescrypt_encryptBlock(Register from, Register to, Register key, Register keylen, + Register table1, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6, Register t7); + void kernel_aescrypt_decryptBlock(Register from, Register to, Register key, Register keylen, + Register table1, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6, Register t7); + void kernel_aescrypt_round(Register table_te, Register key, + Register t0, Register t1, Register t2, Register t3, + Register a, Register tmp1, Register tmp2); + void kernel_aescrypt_firstRound(Register in, Register key, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6, Register t7); + void kernel_aescrypt_lastRound( + Register table_te, Register key, Register to, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6, Register t7); + void kernel_aescrypt_lastRound_cbc( + Register table_te, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6); + + void kernel_aescrypt_encrypt(Register from, Register to, Register key, Register rvec, + Register len, Register keylen, Register table1, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6); + void kernel_aescrypt_decrypt(Register from, Register to, Register key, Register rvec, + Register len, Register keylen, Register table1, + Register t0, Register t1, Register t2, Register t3, + Register t4, Register t5, Register t6); + + void sha_round1(Register st_b, Register st_c, Register st_d, + Register tmp, Register st_f, int sh); + + void sha_round2(Register st_b, Register st_c, Register st_d, + Register tmp, Register st_f, int sh); + + void sha_round3(Register st_b, Register st_c, Register st_d, + Register tmp, Register st_f, int sh); + + void sha_w0(FloatRegister w16, FloatRegister w14, + FloatRegister w8, FloatRegister w4, FloatRegister w2, + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4, + FloatRegister st_k, FloatRegister st_kw, bool update); + + void sha_w(FloatRegister w16, FloatRegister w14, + FloatRegister w12, FloatRegister w10, FloatRegister w8, + FloatRegister w6, FloatRegister w4, FloatRegister w2, + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4, + FloatRegister st_k, FloatRegister st_kw, Register counter, Register rtmp, + bool update = true); + + void kernel_sha_implCompress(Register from, Register state, + Register counter, Register table_k, + Register st_a, Register st_b, + Register st_c, Register st_d, Register st_e, + Register tmp, Register cunter2, Register st_new_a, Register st_w); + + void sha256_implCompress_iter( + Register ra, Register rb, Register rc, Register rd, + Register re, Register rf, Register rg, Register rh, + FloatRegister Dkw1, FloatRegister Dkw2, + Register step, + Register tmp, + Register ra2, Register re2); + void sha256_implCompress_iter0( + Register Da, Register Db, Register Dc, Register Dd, + Register De, Register Df, Register Dg, Register Dh, + FloatRegister Dkw, int index, + Register Dtmp, + Register Dnew_a, Register Dnew_e); + void sha256_w0( + FloatRegister w_m16, FloatRegister w_m15, FloatRegister w_m14, + FloatRegister w_m7, FloatRegister w_m6, + FloatRegister w_m2, + FloatRegister Qtmp_S0, FloatRegister Qtmp_S1, + FloatRegister Qtmp1); + void sha256_w(FloatRegister w16, FloatRegister w14, + FloatRegister w12, FloatRegister w10, FloatRegister w8, + FloatRegister w6, FloatRegister w4, FloatRegister w2, + FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, + FloatRegister st_kw, Register counter, Register rtmp); + + void kernel_sha256_implCompress(Register from, Register state, + Register counter, Register table_k, + Register ra, Register rb, Register rc, Register rd, Register re, + Register rf, Register rg, Register rh, + Register ra2, Register re2); + + void kernel_sha512_implCompress(Register from, Register state, + Register counter, Register table_k); + + void sha512_sigma(FloatRegister x, + FloatRegister Qtmp, FloatRegister Dsigma, int sh1, int sh2, int sh3); + void sha512_delta(FloatRegister x, + FloatRegister Qtmp, FloatRegister Ddelta, int sh1, int sh2, int sh3); + void sha512_ch(FloatRegister x, FloatRegister y, FloatRegister z, + FloatRegister Dtmp, FloatRegister Dch); + void sha512_maj(FloatRegister x, FloatRegister y, FloatRegister z, + FloatRegister Dtmp, FloatRegister Dmaj); + + // Stack push and pop individual 64 bit registers + void push(Register src); + void pop(Register dst); + + // push all registers onto the stack + void pusha(); + void popa(); + + void repne_scan(Register addr, Register value, Register count, + Register scratch); + void repne_scanw(Register addr, Register value, Register count, + Register scratch); + + // Form an address from base + offset in Rd. Rd may or may not actually be + // used: you must use the Address that is returned. It is up to you to ensure + // that the shift provided matches the size of your data. + Address form_address(Register Rd, Register base, long byte_offset, int shift); + + public: + + void ldr_constant(Register dest, const Address &const_addr) { + if (NearCpool) { + ldr(dest, const_addr); + } else { + mov(dest, InternalAddress(const_addr.target())); + ldr(dest, dest); + } + } + + address read_polling_page(Register r, address page, relocInfo::relocType rtype); + address read_polling_page(Register r, relocInfo::relocType rtype); + void get_polling_page(Register dest, address page, relocInfo::relocType rtype); + + // BigInteger intrinsics + void multiply_to_len(Register x, Register xlen, Register y, Register ylen, + Register z, Register zlen, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, + Register tmp5, Register tmp6); + void mul_add(Register out, Register in, Register offset, Register len, Register k, + Register tmp1, Register tmp2, Register tmp3); + + // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. + void update_byte_crc32(Register crc, Register val, Register table); + void update_word_crc32(Register crc, Register v, Register tmp, Register tmp2, + Register table0, Register table1, Register table2, Register table3); +// void update_byte_crc32c(Register crc, Register val, Register table); + void update_word_crc32c(Register crc, Register v, Register tmp, Register tmp2, + Register table0, Register table1, Register table2, Register table3); + + // Auto dispatch for barriers isb, dmb & dsb. + void isb() { + if(VM_Version::features() & FT_ARMV7) { + Assembler::isb(); + } else { + cp15isb(); + } + } + + void dsb(enum barrier option) { + if(VM_Version::features() & FT_ARMV7) { + Assembler::dsb(option); + } else { + cp15dsb(); + } + } + + void dmb(enum barrier option) { + if(VM_Version::features() & FT_ARMV7) { + Assembler::dmb(option); + } else { + cp15dmb(); + } + } + + void membar(Membar_mask_bits order_constraint) { + dmb(Assembler::barrier(order_constraint)); + } + + // ISB may be needed because of a safepoint + void maybe_isb() { MacroAssembler::isb(); } + + // Helper functions for 64-bit multipliction, division and remainder + // does = * + void mult_long(Register Rd, Register Rn, Register Rm); + // does = * + void mult_long(Register Rd, Register Rdh, Register Rn, Register Rnh, Register Rm, Register Rmh); + + private: + void divide32(Register res, Register num, Register den, bool want_mod); + public: + // = / + // = % + // = / + // = % + void divide(Register Rd, Register Rn, Register Rm, int width, bool want_remainder); + + void extract_bits(Register dest, Register source, int lsb, int width); + + // These functions require that the src/dst register is an even register + // and will emit LDREXD/STREXD if there are multiple cores and the procesor + // supports it. If there's only one core then LDRD/STRD will be emit instead. + // If the processor has multiple cores and doesn't support LDREXD/STREXD then + // LDRD/STRD will be emitted and a warning message printed. + void atomic_ldrd(Register Rt, Register RtII, Register Rbase); + void atomic_strd(Register Rt, Register RtII, Register Rbase, + Register temp, Register tempII); + + private: + // generic fallback ldrd generator. may need to use temporary register + // when register collisions are found + // + // since double_ld_failed_dispatch can introduce address manipulation instructions + // it should return offset of first load/store instruction that will be used + // while constructing implicit null check table + int double_ld_failed_dispatch(Register Rt, Register Rt2, const Address& adr, + void (Assembler::* mul)(unsigned, const Address&, Condition), + void (Assembler::* sgl)(Register, const Address&, Condition), + Register Rtmp, Condition cond); + // ldrd/strd generator. can handle all strd cases and those ldrd where there + // are no register collisions + void double_ldst_failed_dispatch(Register Rt, Register Rt2, const Address& adr, + void (Assembler::* mul)(unsigned, const Address&, Condition), + void (Assembler::* sgl)(Register, const Address&, Condition), + Condition cond); +public: + // override ldrd/strd to perform a magic for when Rt + 1 != Rt2 or any other + // conditions which prevent to use single ldrd/strd insn. a pair of ldr/str + // is used instead then + // + // Since ldrd/strd macro can introduce address manipulation instructions + // it should return offset of first load/store instruction that will be used + // while constructing implicit null check table + using Assembler::ldrd; + int ldrd(Register Rt, Register Rt2, const Address& adr, Register Rmp = rscratch1, Condition cond = C_DFLT); + using Assembler::strd; + int strd(Register Rt, Register Rt2, const Address& adr, Condition cond = C_DFLT); + +private: + void bfc_impl(Register rd, int lsb, int width, Condition cond); +public: + void bfc(Register Rd, int lsb, int width, Condition cond = C_DFLT) { + if (VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7)) + Assembler::bfc(Rd, lsb, width, cond); + else + bfc_impl(Rd, lsb, width, cond); + } + + void align_stack() { + if (StackAlignmentInBytes > 4) + bic(sp, sp, StackAlignmentInBytes-1); + } + +#ifdef ASSERT + void verify_stack_alignment(); +#endif + + // Debug helper + void save_machine_state(); + void restore_machine_state(); + + static uint32_t bytecodes_until_print; + static uint32_t bytecodes_executed; + static int enable_debug; + static int enable_method_debug; + static int enable_debugging_static; + + + void bytecode_seen(Register bc_reg, Register scratch); + static void print_unseen_bytecodes(); + void reg_printf_internal(bool important, const char *fmt, Register a = r0, Register b = r0, Register c = r0); + void reg_printf_important(const char *fmt, Register a = r0, Register b = r0, Register c = r0); + void reg_printf(const char *fmt, Register a = r0, Register b = r0, Register c = r0); + void print_method_entry(Register rmethod, bool native); + void print_method_exit(bool normal = true); + void get_bytecode(Register bc, Register dst); + static void print_cpool(InstanceKlass *klass); + + void create_breakpoint(); + +#ifdef COMPILER2 + static bool _reachable_from_cache(address target); + bool reachable_from_cache(address target); + static bool _cache_fully_reachable(); + bool cache_fully_reachable(); + + void call(address target, RelocationHolder rspec, Condition cond = Assembler::AL); + + void call(address target, + relocInfo::relocType rtype = relocInfo::runtime_call_type, + Condition cond = Assembler::AL) { + call(target, Relocation::spec_simple(rtype), cond); + } + + void jump(address target, + relocInfo::relocType rtype = relocInfo::runtime_call_type, + Register scratch = noreg, + Condition cond = Assembler::AL); + + void jump(address dest, relocInfo::relocType rtype = relocInfo::runtime_call_type, + Condition cond = Assembler::AL) { + jump(dest, rtype, rscratch2, cond); + } + + void mov_address(Register rd, address addr, RelocationHolder const& rspec) { + assert(rspec.type() != relocInfo::runtime_call_type, "do not use mov_address for runtime calls"); + assert(rspec.type() != relocInfo::static_call_type, "do not use mov_address for relocable calls"); + if (rspec.type() == relocInfo::none) { + // absolute address, relocation not needed + mov(rd, (uint32_t)addr); + return; + } + if (VM_Version::features() & FT_ARMV6T2) { + relocate(rspec); + int c = (int)addr; + movw_i(rd, c & 0xffff); + if ((unsigned int)c >> 16) { + movt_i(rd, (unsigned int)c >> 16); + } + return; + } + Label skip_literal; + Label literal; + ldr(rd, literal); + b(skip_literal); + bind(literal); + emit_address(addr); + bind(skip_literal); + } + + void arm_stack_overflow_check(int frame_size_in_bytes, Register tmp); + void arm_stack_overflow_check(Register Rsize, Register tmp); + + void mov_relative_address(Register rd, address addr, Condition cond = Assembler::AL) { + int offset = addr - pc() - 8; + assert((offset & 3) == 0, "bad alignment"); + if (offset >= 0) { + assert(is_valid_for_imm12(offset), "addr too far"); + add(rd, r15_pc, offset, cond); + } else { + assert(is_valid_for_imm12(-offset), "addr too far"); + sub(rd, r15_pc, -offset, cond); + } + } + + void floating_cmp(Register dst); + + void fast_lock(Register Roop, Register Rbox, Register Rmark, Register Rscratch, Register Rscratch2); + void fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2); +#endif +}; + + +#ifdef ASSERT +inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } +#endif + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { + private: + MacroAssembler* _masm; + Label _label; + + public: + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); + ~SkipIfEqual(); +}; + +struct tableswitch { + Register _reg; + int _insn_index; + jint _first_key; + jint _last_key; + Label _after; + Label _branches; +}; + +#endif // CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_HPP --- /dev/null 2018-09-25 19:25:12.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/macroAssembler_aarch32.inline.hpp 2018-09-25 19:25:12.000000000 +0300 @@ -0,0 +1,36 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_INLINE_HPP +#define CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_INLINE_HPP + +#include "asm/assembler.hpp" + +#ifndef PRODUCT + +#endif // ndef PRODUCT + +#endif // CPU_AARCH32_VM_MACROASSEMBLER_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:25:13.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/methodHandles_aarch32.cpp 2018-09-25 19:25:13.000000000 +0300 @@ -0,0 +1,457 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "classfile/javaClasses.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/flags/flagSetting.hpp" +#include "runtime/frame.inline.hpp" + +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { + if (VerifyMethodHandles) + verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), + "MH argument is a Class"); + __ ldr(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); +} + +#ifdef ASSERT +static int check_nonzero(const char* xname, int x) { + assert(x != 0, "%s should be nonzero", xname); + return x; +} +#define NONZERO(x) check_nonzero(#x, x) +#else //ASSERT +#define NONZERO(x) (x) +#endif //PRODUCT + +#ifdef ASSERT +void MethodHandles::verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message) { + // FIXME Did this code ever work? + // or have I changed the working of cmpptr? + // previously cmpptr took the klass_addr, did it also do dereference before the comparison? + InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id); + Klass* klass = SystemDictionary::well_known_klass(klass_id); + Register temp = rscratch2; + Register temp2 = rscratch1; // used by MacroAssembler::cmpptr + Label L_ok, L_bad; + BLOCK_COMMENT("verify_klass {"); + __ verify_oop(obj); + __ cbz(obj, L_bad); + __ push(RegSet::of(temp, temp2), sp); + __ load_klass(temp, obj); + __ cmpptr(temp, ExternalAddress((address) klass_addr)); + __ b(L_ok, Assembler::EQ); + intptr_t super_check_offset = klass->super_check_offset(); + __ ldr(temp, Address(temp, super_check_offset)); + __ cmpptr(temp, ExternalAddress((address) klass_addr)); + __ b(L_ok, Assembler::EQ); + __ pop(RegSet::of(temp, temp2), sp); + __ bind(L_bad); + __ stop(error_message); + __ BIND(L_ok); + __ pop(RegSet::of(temp, temp2), sp); + BLOCK_COMMENT("} verify_klass"); +} + +void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { } + +#endif //ASSERT + +void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry) { + assert(method == rmethod, "interpreter calling convention"); + Label L_no_such_method; + __ cbz(rmethod, L_no_such_method); + __ verify_method_ptr(method); + + if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { + Label run_compiled_code; + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + + __ ldrb(rscratch1, Address(rthread, JavaThread::interp_only_mode_offset())); + __ cbnz(rscratch1, run_compiled_code); + __ ldr(rscratch1, Address(method, Method::interpreter_entry_offset())); + __ b(rscratch1); + __ BIND(run_compiled_code); + } + + const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : + Method::from_interpreted_offset(); + __ ldr(rscratch1,Address(method, entry_offset)); + __ b(rscratch1); + __ bind(L_no_such_method); + __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); +} + +void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry) { + BLOCK_COMMENT("jump_to_lambda_form {"); + // This is the initial entry point of a lazy method handle. + // After type checking, it picks up the invoker from the LambdaForm. + assert_different_registers(recv, method_temp, temp2); + assert(recv != noreg, "required register"); + assert(method_temp == rmethod, "required register for loading method"); + + //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); + + // Load the invoker, as MH -> MH.form -> LF.vmentry + __ verify_oop(recv); + __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); + __ verify_oop(method_temp); + __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2); + __ verify_oop(method_temp); + __ access_load_word_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); + + if (VerifyMethodHandles && !for_compiler_entry) { + // make sure recv is already on stack + __ ldr(temp2, Address(method_temp, Method::const_offset())); + __ load_sized_value(temp2, + Address(temp2, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + Label L; + __ ldr(rscratch1, __ argument_address(temp2, -1)); + __ cmpoop(recv, rscratch1); + __ b(L, Assembler::EQ); + __ ldr(r0, __ argument_address(temp2, -1)); + __ hlt(0); + __ BIND(L); + } + + jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); + BLOCK_COMMENT("} jump_to_lambda_form"); +} + +// Code generation +address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, + vmIntrinsics::ID iid) { + const bool not_for_compiler_entry = false; // this is the interpreter entry + assert(is_signature_polymorphic(iid), "expected invoke iid"); + if (iid == vmIntrinsics::_invokeGeneric || + iid == vmIntrinsics::_compiledLambdaForm) { + // Perhaps surprisingly, the symbolic references visible to Java are not directly used. + // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. + // They all allow an appendix argument. + __ hlt(0); // empty stubs make SG sick + return NULL; + } + + // rmethod: Method* + // r3: argument locator (parameter slot count, added to rsp) + // r1: used as temp to hold mh or receiver + // r0, r11: garbage temps, blown away + Register argp = r3; // argument list ptr, live on error paths + Register temp = r0; + Register mh = r1; // MH receiver; dies quickly and is recycled + + // here's where control starts out: + __ align(CodeEntryAlignment); + address entry_point = __ pc(); + + if (VerifyMethodHandles) { + assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); + Label L; + BLOCK_COMMENT("verify_intrinsic_id {"); + __ ldrh(rscratch1, Address(rmethod, Method::intrinsic_id_offset_in_bytes())); + __ cmp(rscratch1, (int) iid, temp); + __ b(L, Assembler::EQ); + if (iid == vmIntrinsics::_linkToVirtual || + iid == vmIntrinsics::_linkToSpecial) { + // could do this for all kinds, but would explode assembly code size + trace_method_handle(_masm, "bad Method*::intrinsic_id"); + } + __ hlt(0); + __ bind(L); + BLOCK_COMMENT("} verify_intrinsic_id"); + } + + // First task: Find out how big the argument list is. + Address r3_first_arg_addr; + int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); + assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); + if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { + __ ldr(argp, Address(rmethod, Method::const_offset())); + __ load_sized_value(argp, + Address(argp, ConstMethod::size_of_parameters_offset()), + sizeof(u2), /*is_signed*/ false); + // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); + r3_first_arg_addr = __ argument_address(argp, -1); + } else { + DEBUG_ONLY(argp = noreg); + } + + if (!is_signature_polymorphic_static(iid)) { + __ ldr(mh, r3_first_arg_addr); + DEBUG_ONLY(argp = noreg); + } + + // r3_first_arg_addr is live! + + trace_method_handle_interpreter_entry(_masm, iid); + if (iid == vmIntrinsics::_invokeBasic) { + generate_method_handle_dispatch(_masm, iid, mh, noreg, not_for_compiler_entry); + + } else { + // Adjust argument list by popping the trailing MemberName argument. + Register recv = noreg; + if (MethodHandles::ref_kind_has_receiver(ref_kind)) { + // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. + __ ldr(recv = r2, r3_first_arg_addr); + } + DEBUG_ONLY(argp = noreg); + Register rmember = rmethod; // MemberName ptr; incoming method ptr is dead now + __ pop(rmember); // extract last argument + generate_method_handle_dispatch(_masm, iid, recv, rmember, not_for_compiler_entry); + } + + return entry_point; +} + + +void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, + vmIntrinsics::ID iid, + Register receiver_reg, + Register member_reg, + bool for_compiler_entry) { + assert(is_signature_polymorphic(iid), "expected invoke iid"); + // temps used in this code are not used in *either* compiled or interpreted calling sequences + // use interpreter caching registers (caller-save in compiler). + // Starting from r5 as r4 used by gen_special_dispatch. + Register temp1 = r5; + Register temp2 = r6; + Register temp3 = r7; + assert_different_registers(temp1, temp2, temp3, receiver_reg, member_reg); + if (for_compiler_entry) { + assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); + assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3); + assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3); + assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3); + } + + assert_different_registers(temp1, temp2, temp3, receiver_reg); + assert_different_registers(temp1, temp2, temp3, member_reg); + + if (iid == vmIntrinsics::_invokeBasic) { + // indirect through MH.form.vmentry.vmtarget + jump_to_lambda_form(_masm, receiver_reg, rmethod, temp1, for_compiler_entry); + + } else { + // The method is a member invoker used by direct method handles. + if (VerifyMethodHandles) { + // make sure the trailing argument really is a MemberName (caller responsibility) + verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), + "MemberName required for invokeVirtual etc."); + } + + Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); + Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); + Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); + Address vmtarget_method( rmethod, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); + + Register temp1_recv_klass = temp1; + if (iid != vmIntrinsics::_linkToStatic) { + __ verify_oop(receiver_reg); + if (iid == vmIntrinsics::_linkToSpecial) { + // Don't actually load the klass; just null-check the receiver. + __ null_check(receiver_reg); + } else { + // load receiver klass itself + __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + BLOCK_COMMENT("check_receiver {"); + // The receiver for the MemberName must be in receiver_reg. + // Check the receiver against the MemberName.clazz + if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { + // Did not load it above... + __ load_klass(temp1_recv_klass, receiver_reg); + __ verify_klass_ptr(temp1_recv_klass); + } + if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { + Label L_ok; + Register temp2_defc = temp2; + __ load_heap_oop(temp2_defc, member_clazz, temp3); + load_klass_from_Class(_masm, temp2_defc); + __ verify_klass_ptr(temp2_defc); + __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); + // If we get here, the type check failed! + __ hlt(0); + // __ STOP("receiver class disagrees with MemberName.clazz"); + __ bind(L_ok); + } + BLOCK_COMMENT("} check_receiver"); + } + if (iid == vmIntrinsics::_linkToSpecial || + iid == vmIntrinsics::_linkToStatic) { + DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass + } + + // Live registers at this point: + // member_reg - MemberName that was the trailing argument + // temp1_recv_klass - klass of stacked receiver, if needed + // r1 ... r0 - compiler arguments (if compiled) + + Label L_incompatible_class_change_error; + switch (iid) { + case vmIntrinsics::_linkToSpecial: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); + } + __ load_heap_oop(rmethod, member_vmtarget); + __ access_load_word_at(T_ADDRESS, IN_HEAP, rmethod, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToStatic: + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); + } + __ load_heap_oop(rmethod, member_vmtarget); + __ access_load_word_at(T_ADDRESS, IN_HEAP, rmethod, vmtarget_method, noreg, noreg); + break; + + case vmIntrinsics::_linkToVirtual: + { + // same as TemplateTable::invokevirtual, + // minus the CP setup and profiling: + + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); + } + + // pick out the vtable index from the MemberName, and then we can discard it: + Register temp2_index = temp2; + __ access_load_word_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); + + if (VerifyMethodHandles) { + Label L_index_ok; + __ cmp(temp2_index, 0U); + __ b(L_index_ok, Assembler::GE); + __ hlt(0); + __ BIND(L_index_ok); + } + + // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget + // at this point. And VerifyMethodHandles has already checked clazz, if needed. + + // get target Method* & entry point + __ lookup_virtual_method(temp1_recv_klass, temp2_index, rmethod); + break; + } + + case vmIntrinsics::_linkToInterface: + { + // same as TemplateTable::invokeinterface + // (minus the CP setup and profiling, with different argument motion) + if (VerifyMethodHandles) { + verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); + } + + Register temp3_intf = temp3; + __ load_heap_oop(temp3_intf, member_clazz); + load_klass_from_Class(_masm, temp3_intf); + __ verify_klass_ptr(temp3_intf); + + Register rindex = rmethod; + __ access_load_word_at(T_ADDRESS, IN_HEAP, rindex, member_vmindex, noreg, noreg); + if (VerifyMethodHandles) { + Label L; + __ cmp(rindex, 0); + __ b(L, Assembler::GE); + __ hlt(0); + __ bind(L); + } + + // given intf, index, and recv klass, dispatch to the implementation method + __ lookup_interface_method(temp1_recv_klass, temp3_intf, + // note: next two args must be the same: + rindex, rmethod, + temp2, + L_incompatible_class_change_error); + break; + } + + default: + fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); + break; + } + + // live at this point: rmethod, r13 (if interpreted) + + // After figuring out which concrete method to call, jump into it. + // Note that this works in the interpreter with no data motion. + // But the compiled version will require that r2_recv be shifted out. + __ verify_method_ptr(rmethod); + jump_from_method_handle(_masm, rmethod, temp1, for_compiler_entry); + if (iid == vmIntrinsics::_linkToInterface) { + __ bind(L_incompatible_class_change_error); + __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + } + } +} + +#ifndef PRODUCT +void trace_method_handle_stub(const char* adaptername, + oop mh, + intptr_t* saved_regs, + intptr_t* entry_sp) { } + +// The stub wraps the arguments in a struct on the stack to avoid +// dealing with the different calling conventions for passing 6 +// arguments. +struct MethodHandleStubArguments { + const char* adaptername; + oopDesc* mh; + intptr_t* saved_regs; + intptr_t* entry_sp; +}; +void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { } + +void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { } +#endif //PRODUCT --- /dev/null 2018-09-25 19:25:14.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/methodHandles_aarch32.hpp 2018-09-25 19:25:14.000000000 +0300 @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// Platform-specific definitions for method handles. +// These definitions are inlined into class MethodHandles. + +// Adapters +enum /* platform_dependent_constants */ { + adapter_code_size = 32000 DEBUG_ONLY(+ 120000) +}; + +public: + + static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); + + static void verify_klass(MacroAssembler* _masm, + Register obj, SystemDictionary::WKID klass_id, + const char* error_message = "wrong klass") NOT_DEBUG_RETURN; + + static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { + verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + "reference is a MH"); + } + + static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; + + // Similar to InterpreterMacroAssembler::jump_from_interpreted. + // Takes care of special dispatch from single stepping too. + static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, + bool for_compiler_entry); + + static void jump_to_lambda_form(MacroAssembler* _masm, + Register recv, Register method_temp, + Register temp2, + bool for_compiler_entry); + + static Register saved_last_sp_register() { + // Should be in sharedRuntime, not here. + return noreg; + } --- /dev/null 2018-09-25 19:25:15.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/nativeInst_aarch32.cpp 2018-09-25 19:25:15.000000000 +0300 @@ -0,0 +1,719 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/codeCache.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_aarch32.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/handles.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/ostream.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif + +// LIRAssembler fills patching site with nops up to NativeCall::instruction_size +int NativeCall::instruction_size = 5 * arm_insn_sz; + +NativeInstruction* NativeInstruction::from(address addr) { + return (NativeInstruction*) addr; +} + +//------------------------------------------------------------------- + +void NativeCall::init() { + instruction_size = (VM_Version::features() & (FT_ARMV6T2 | FT_ARMV7) ? 3 : 5) * arm_insn_sz; +} + +void NativeCall::verify() { + if (!is_call()) { + fatal("not a call"); + } +} + +address NativeCall::destination() const { + assert(is_call(), "not a call"); + if (NativeImmCall::is_at(addr())) { + return NativeImmCall::from(addr())->destination(); + } else if (NativeMovConstReg::is_at(addr())) { + return address(NativeMovConstReg::from(addr())->data()); + } else if (NativeTrampolineCall::is_at(addr())) { + return NativeTrampolineCall::from(addr())->destination(); + } + ShouldNotReachHere(); + return NULL; +} + +void NativeCall::set_destination(address dest) { + assert(is_call(), "not a call"); + if (NativeImmCall::is_at(addr())) { + NativeImmCall::from(addr())->set_destination(dest); + } else if (NativeMovConstReg::is_at(addr())) { + NativeMovConstReg::from(addr())->set_data((uintptr_t) dest); + } else if (NativeTrampolineCall::is_at(addr())) { + NativeTrampolineCall::from(addr())->set_destination(dest); + } else { + ShouldNotReachHere(); + } +} + +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(is_call(), "not a call"); + + // patching should be not only safe (i.e. this call could be executed by some thread), + // but it also should be atomic (some other thread could call NativeCall::destination() + // and see valid destination value) + + if (NativeImmCall::is_at(addr())) { + NativeImmCall::from(addr())->set_destination(dest); + ICache::invalidate_word(addr()); + } else if (NativeTrampolineCall::is_at(addr())) { + NativeTrampolineCall::from(addr())->set_destination_mt_safe(dest); + } else { + ShouldNotReachHere(); + } +} + +void NativeCall::insert(address code_pos, address entry) { + Unimplemented(); +} + +bool NativeCall::is_call_before(address return_address) { + if (NativeTrampolineCall::is_at(return_address - NativeCall::instruction_size)) { + return true; + } + + if (NativeMovConstReg::is_at(return_address - NativeCall::instruction_size)) { + NativeMovConstReg *nm = NativeMovConstReg::from(return_address - NativeCall::instruction_size); + address next_instr = nm->next_instruction_address(); + if (NativeRegCall::is_at(next_instr) && NativeRegCall::from(next_instr)->destination() == nm->destination()) { + return true; + } + } + + if (NativeImmCall::is_at(return_address - NativeBranchType::instruction_size)) { + return true; + } + + return false; +} + +address NativeCall::next_instruction_address() const { + assert(is_call(), "not a call"); + if (NativeImmCall::is_at(addr())) { + return NativeImmCall::from(addr())->next_instruction_address(); + } else if (NativeMovConstReg::is_at(addr())) { + NativeMovConstReg *nm = NativeMovConstReg::from(addr()); + address next_instr = nm->next_instruction_address(); + assert(NativeRegCall::is_at(next_instr), "should be"); + return NativeRegCall::from(next_instr)->next_instruction_address(); + } else if (NativeTrampolineCall::is_at(addr())) { + return NativeTrampolineCall::from(addr())->next_instruction_address(); + } else { + ShouldNotReachHere(); + return NULL; + } +} + +address NativeCall::return_address() const { + return next_instruction_address(); +} + +bool NativeCall::is_at(address addr) { + if (NativeImmCall::is_at(addr)) { + return true; + } + + if (NativeMovConstReg::is_at(addr)) { + NativeMovConstReg *nm = NativeMovConstReg::from(addr); + address next_instr = nm->next_instruction_address(); + if (NativeRegCall::is_at(next_instr) && + NativeRegCall::from(next_instr)->destination() == nm->destination()) { + return true; + } + } + + if (NativeTrampolineCall::is_at(addr)) { + return true; + } + + return false; +} + +NativeCall* NativeCall::from(address addr) { + assert(NativeCall::is_at(addr), ""); + return (NativeCall*) addr; +} + +//------------------------------------------------------------------- + +address NativeTrampolineCall::destination() const { + assert(is_at(addr()), "not call"); + return (address) uint_at(8); +} + +void NativeTrampolineCall::set_destination(address dest) { + assert(is_at(addr()), "not call"); + set_uint_at(8, (uintptr_t) dest); +} + +void NativeTrampolineCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(is_at(addr()), "not call"); + set_destination(dest); + ICache::invalidate_word(addr() + 8); +} + +bool NativeTrampolineCall::is_at(address addr) { + return (as_uint(addr ) & ~0xffu) == 0xe28fe000 // add lr, pc, #disp + && as_uint(addr + 4) == 0xe51ff004; // ldr pc, [pc, -4] +} + +NativeTrampolineCall* NativeTrampolineCall::from(address addr) { + assert(NativeTrampolineCall::is_at(addr), ""); + return (NativeTrampolineCall*) addr; +} + +//------------------------------------------------------------------- + +address NativeImmCall::destination() const { + assert(is_imm_call(), "not call"); + uint32_t insn = as_uint(); + intptr_t off = Instruction_aarch32::sextract(insn, 23, 0); + address destination = addr() + 8 + (off << 2); + return destination; +} + +void NativeImmCall::set_destination(address dest) { + assert(is_imm_call(), "not call"); + patch_offset_to(dest); +} + +bool NativeImmCall::is_at(address addr) { + return Instruction_aarch32::extract(as_uint(addr), 27, 24) == 0b1011; +} + +NativeImmCall* NativeImmCall::from(address addr) { + assert(NativeImmCall::is_at(addr), ""); + return (NativeImmCall*) addr; +} + +//------------------------------------------------------------------- + +Register NativeRegCall::destination() const { + assert(is_reg_call(), "not call"); + return (Register) Instruction_aarch32::extract(as_uint(), 3, 0); +} + +bool NativeRegCall::is_at(address addr) { + unsigned insn = as_uint(addr); + return is_branch_type(insn) && Instruction_aarch32::extract(insn, 7, 4) == 0b0011; +} + +NativeRegCall* NativeRegCall::from(address addr) { + assert(NativeRegCall::is_at(addr), ""); + return (NativeRegCall*) addr; +} + +//------------------------------------------------------------------- + +address NativeFarLdr::skip_patching_prolog(address addr) { + if (NativeInstruction::from(addr)->is_nop() && + NativeInstruction::from(addr + arm_insn_sz)->is_barrer()) { + return addr+2*arm_insn_sz; + } + return addr; +} + +bool NativeFarLdr::is_at(address addr) { + addr = skip_patching_prolog(addr); + unsigned add_condidate = as_uint(addr); + if (((Instruction_aarch32::extract(add_condidate, 27, 21) != 0b0010100) /*add*/ && + (Instruction_aarch32::extract(add_condidate, 27, 21) != 0b0010010) /*sub*/) || + (Instruction_aarch32::extract(add_condidate, 19, 16) != (unsigned) r15_pc->encoding())) { + return false; + } + Register dest = as_Register(Instruction_aarch32::extract(add_condidate, 15, 12)); + return NativeMovConstReg::is_ldr_literal_at(addr + arm_insn_sz, dest); +} + +NativeFarLdr* NativeFarLdr::from(address addr) { + assert(is_at(addr), ""); + return (NativeFarLdr*) addr; +} + +intptr_t* NativeFarLdr::data_addr() { + address self = skip_patching_prolog(addr()); + off_t offset = 8; + off_t add_off = Assembler::decode_imm12(as_uint(self) & 0xfff); + if (Instruction_aarch32::extract(as_uint(self), 24, 21) == 0x4) { + offset += add_off; + } else { + offset -= add_off; + } + off_t ldr_off = as_uint(self + arm_insn_sz) & 0xfff; + if (Instruction_aarch32::extract(as_uint(self), 23, 23)) { + offset += ldr_off; + } else { + offset -= ldr_off; + } + + return (intptr_t*)(self + offset); +} + +void NativeFarLdr::set_data_addr(intptr_t *data_addr) { + address self = skip_patching_prolog(addr()); + off_t offset = (address)data_addr - (self + 8); + bool minus = false; + if (offset < 0) { + offset = -offset; + minus = true; + } + guarantee((0 <= offset) && (offset <= 0xffffff), "offset too large"); + set_uint_at(self - addr(), (as_uint(self) & ~0xc00fff) | + (minus ? 0x400000u /*sub*/ : 0x800000u /*add*/) | + Assembler::encode_imm12(offset & 0xff000)); + + set_uint_at(self - addr() + arm_insn_sz, + (as_uint(self + arm_insn_sz) & ~0x800fff) | + (minus ? 0x000000 : 0x800000) | + (offset & 0xfff)); + ICache::invalidate_range(self, 2*arm_insn_sz); +} + +address NativeFarLdr::next_instruction_address() const { + return skip_patching_prolog(addr()) + NativeMovConstReg::far_ldr_sz; +} + +//------------------------------------------------------------------- + +void NativeMovConstReg::verify() { + if (!is_mov_const_reg()) { + fatal("not a mov const reg"); + } +} + +intptr_t NativeMovConstReg::data() const { + if (NativeFarLdr::is_at(addr())) { + return *NativeFarLdr::from(addr())->data_addr(); + } + return (intptr_t) MacroAssembler::target_addr_for_insn(addr()); +} + +void NativeMovConstReg::set_data(intptr_t x) { + if (NativeFarLdr::is_at(addr())) { + *NativeFarLdr::from(addr())->data_addr() = x; + // Fences should be provided by calling code! + } else { + // Store x into the instruction stream. + MacroAssembler::pd_patch_instruction(addr(), (address)x); + ICache::invalidate_range(addr(), max_instruction_size); + } + + // Find and replace the oop/metadata corresponding to this + // instruction in oops section. + CodeBlob* cb = CodeCache::find_blob(addr()); + nmethod* nm = cb->as_nmethod_or_null(); + if (nm != NULL) { + RelocIterator iter(nm, addr(), next_instruction_address()); + while (iter.next()) { + if (iter.type() == relocInfo::oop_type) { + oop* oop_addr = iter.oop_reloc()->oop_addr(); + *oop_addr = cast_to_oop(x); + break; + } else if (iter.type() == relocInfo::metadata_type) { + Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); + *metadata_addr = (Metadata*)x; + break; + } + } + } +} + +void NativeMovConstReg::print() { + tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, + p2i(addr()), data()); +} + +Register NativeMovConstReg::destination() const { + return (Register) Instruction_aarch32::extract(as_uint(), 15, 12); +} + +NativeMovConstReg* NativeMovConstReg::from(address addr) { + assert(NativeMovConstReg::is_at(addr), ""); + return (NativeMovConstReg*) addr; +} + +bool NativeMovConstReg::is_ldr_literal_at(address addr, Register from) { + unsigned insn = as_uint(addr); + if (from == noreg) { + return (Instruction_aarch32::extract(insn, 27, 20) & 0b11100101) == 0b01000001; + } + unsigned reg = from->encoding(); + return (Instruction_aarch32::extract(insn, 27, 16) & 0b111001011111) == (0b010000010000 | reg); +} + +bool NativeMovConstReg::is_far_ldr_literal_at(address addr) { + return NativeFarLdr::is_at(addr); +} + +bool NativeMovConstReg::is_movw_movt_at(address addr) { + unsigned insn = as_uint(addr); + unsigned insn2 = as_uint(addr + arm_insn_sz); + return Instruction_aarch32::extract(insn, 27, 20) == 0b00110000 && //mov + Instruction_aarch32::extract(insn2, 27, 20) == 0b00110100; //movt +} + +bool NativeMovConstReg::is_mov_n_three_orr_at(address addr) { + return (Instruction_aarch32::extract(as_uint(addr), 27, 16) & 0b111111101111) == 0b001110100000 && + Instruction_aarch32::extract(as_uint(addr+arm_insn_sz), 27, 20) == 0b00111000 && + Instruction_aarch32::extract(as_uint(addr+2*arm_insn_sz), 27, 20) == 0b00111000 && + Instruction_aarch32::extract(as_uint(addr+3*arm_insn_sz), 27, 21) == 0b0011100; +} + +bool NativeMovConstReg::is_at(address addr) { + return is_ldr_literal_at(addr) || + is_far_ldr_literal_at(addr) || + is_movw_movt_at(addr) || + is_mov_n_three_orr_at(addr); +} + +//------------------------------------------------------------------- +address NativeMovRegMem::instruction_address() const { + return addr(); +} + +int NativeMovRegMem::offset() const { + assert(NativeMovConstReg::is_at(addr()), "no others"); + return NativeMovConstReg::from(addr())->data(); +} + +void NativeMovRegMem::set_offset(int x) { + assert(NativeMovConstReg::is_at(addr()), "no others"); + NativeMovConstReg::from(addr())->set_data(x); +} + +void NativeMovRegMem::verify() { + assert(NativeMovConstReg::is_at(addr()), "no others"); +} + +//-------------------------------------------------------------------------------- + +void NativeJump::verify() { + if (!is_jump()) { + fatal("not a call"); + } +} + +void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) { +} + +address NativeJump::jump_destination() const { + assert(is_jump(), "not a call"); + if (NativeImmJump::is_at(addr())) { + return NativeImmJump::from(addr())->destination(); + } else if (NativeMovConstReg::is_at(addr())) { + return address(NativeMovConstReg::from(addr())->data()); + } + ShouldNotReachHere(); + return NULL; +} + +void NativeJump::set_jump_destination(address dest) { + assert(is_jump(), "not a call"); + if (NativeImmJump::is_at(addr())) { + NativeImmJump::from(addr())->set_destination(dest); + } else if (NativeMovConstReg::is_at(addr())) { + NativeMovConstReg::from(addr())->set_data((uintptr_t) dest); + } else { + ShouldNotReachHere(); + } +} + +address NativeJump::next_instruction_address() const { + assert(is_jump(), "not a call"); + if (NativeImmJump::is_at(addr())) { + return NativeImmJump::from(addr())->next_instruction_address(); + } else if (NativeMovConstReg::is_at(addr())) { + address after_move = NativeMovConstReg::from(addr())->next_instruction_address(); + assert(NativeRegJump::is_at(after_move), "should be jump"); + return NativeRegJump::from(after_move)->next_instruction_address(); + } + ShouldNotReachHere(); + return NULL; +} + +bool NativeJump::is_at(address addr) { + if (NativeImmJump::is_at(addr)) { + return true; + } + if (NativeMovConstReg::is_at(addr)) { + NativeMovConstReg *nm = NativeMovConstReg::from(addr); + address next_instr = nm->next_instruction_address(); + return NativeRegJump::is_at(next_instr) && + NativeRegJump::from(next_instr)->destination() == nm->destination(); + } + return false; +} + +NativeJump* NativeJump::from(address addr) { + assert(NativeJump::is_at(addr), ""); + return (NativeJump*) addr; +} + +// MT-safe inserting of a jump over a jump or a nop (used by +// nmethod::make_not_entrant_or_zombie) + +void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { + + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), + "expected fixed destination of patch"); + assert(NativeInstruction::from(verified_entry)->is_jump_or_nop() || + NativeInstruction::from(verified_entry)->is_sigill_zombie_not_entrant(), + "Aarch32 cannot replace non-jump with jump"); + + // Patch this nmethod atomically. + if (Assembler::reachable_from_branch_at(verified_entry, dest)) { + assert((((intptr_t) dest & 0x3) == 0) && (((intptr_t) verified_entry & 0x3) == 0), + "addresses should be aligned on 4"); + ptrdiff_t disp = (dest - verified_entry - 8) >> 2; + guarantee((-(1 << 23) <= disp) && (disp < (1 << 23)), "branch overflow"); + + unsigned int insn = (0b11101010 << 24) | (disp & 0xffffff); + *(unsigned int*)verified_entry = insn; + } else { + // We use an illegal instruction for marking a method as + // not_entrant or zombie. + NativeIllegalInstruction::insert(verified_entry); + } + + ICache::invalidate_range(verified_entry, instruction_size); +} + +//------------------------------------------------------------------- + +bool NativeBranchType::is_branch_type(uint32_t insn) { + return Instruction_aarch32::extract(insn, 27, 20) == 0b00010010 && + Instruction_aarch32::extract(insn, 19, 8) == 0b111111111111; +} + +void NativeBranchType::patch_offset_to(address dest) { + uint32_t insn = as_uint(); + const intptr_t off = (dest - (addr() + 8)); + assert((off & 3) == 0, "should be"); + assert(-32 * 1024 * 1024 <= off && off < 32 * 1024 * 1042, + "new offset should fit in instruction"); + + const unsigned off_mask = ((1U << 24) - 1); + insn &= ~off_mask; // mask off offset part + insn |= ((unsigned) off >> 2) & off_mask; + + set_uint(insn); + ICache::invalidate_range(addr_at(0), instruction_size); +} + +//------------------------------------------------------------------- + +address NativeImmJump::destination() const { + assert(is_imm_jump(), "not jump"); + return addr() + 8 + 4 * Instruction_aarch32::sextract(as_uint(), 23, 0); +} + +void NativeImmJump::set_destination(address addr) { + assert(is_imm_jump(), ""); + patch_offset_to(addr); +} + +bool NativeImmJump::is_at(address addr) { + unsigned insn = as_uint(addr); + return Instruction_aarch32::extract(insn, 27, 24) == 0b1010; +} + +NativeImmJump* NativeImmJump::from(address addr) { + assert(NativeImmJump::is_at(addr), ""); + return (NativeImmJump*) addr; +} + +//------------------------------------------------------------------- + +bool NativeRegJump::is_at(address addr) { + unsigned insn = as_uint(addr); + return is_branch_type(insn) && Instruction_aarch32::extract(insn, 7, 4) == 0b0001; +} + +NativeRegJump* NativeRegJump::from(address addr) { + assert(NativeRegJump::is_at(addr), ""); + return (NativeRegJump*) addr; +} + +Register NativeRegJump::destination() const { + assert(is_reg_jump(), ""); + return (Register) Instruction_aarch32::extract(as_uint(), 3, 0); +} + +//------------------------------------------------------------------- + +bool NativeInstruction::is_safepoint_poll() { +#ifdef COMPILER2_OR_JVMCI + // it would be too complex to find the place where poll address is + // loaded into address register since C2 can do this somewhere else + // so we only checking the exact poll instruction in the form + // ldr(r12, [rXXX, #0]) + return (NativeInstruction::as_uint() & 0xfff0ffff) == 0xe590c000; +#else + // a safepoint_poll is implemented in two steps as + // + // movw(r12, polling_page & 0xffff); + // movt(r12, polling_page >> 16); + // ldr(r12, [r12, #0]); + // + // or, if thread-local handshakes are used + // + // ldr(r12, [rthread, #offset]); + // ldr(r12, [r12, #0]); + // + // + // We can rely on this instructions order since we have only C1 + + if (SafepointMechanism::uses_thread_local_poll()) { + const Register scratch = rscratch2; + + if (NativeInstruction::from(addr())->is_ldr(scratch, Address(scratch))) { + return NativeInstruction::from(addr()-arm_insn_sz) + ->is_ldr(scratch, Address(rthread, Thread::polling_page_offset())); + } + } else { + const intptr_t paddr = (intptr_t)os::get_polling_page(); + const Register scratch = rscratch2; + + if (NativeInstruction::from(addr())->is_ldr(scratch, Address(scratch))) { + NativeMovConstReg* mov_const = NativeMovConstReg::before(addr()); + return (mov_const->data() == paddr) && (mov_const->destination() == scratch); + } + } + + return false; +#endif +} + +bool NativeInstruction::is_movt(Register dst, unsigned imm, Assembler::Condition cond) { + bool a1 = Instruction_aarch32::extract(uint_at(0), 27, 20) == 0b00110100; + bool a2 = Instruction_aarch32::extract(uint_at(0), 15, 12) == (unsigned)dst; + bool a3 = Instruction_aarch32::extract(uint_at(0), 11, 0) == ((unsigned)imm & 0xfff); + bool a4 = Instruction_aarch32::extract(uint_at(0), 19, 16) == ((unsigned)imm >> 12); + bool a5 = Instruction_aarch32::extract(uint_at(0), 31, 28) == (unsigned)cond; + + return a1 && a2 && a3 && a4 && a5; +} + +bool NativeInstruction::is_movw(Register dst, unsigned imm, Assembler::Condition cond) { + bool a1 = Instruction_aarch32::extract(uint_at(0), 27, 20) == 0b00110000; + bool a2 = Instruction_aarch32::extract(uint_at(0), 15, 12) == (unsigned)dst; + bool a3 = Instruction_aarch32::extract(uint_at(0), 11, 0) == ((unsigned)imm & 0xfff); + bool a4 = Instruction_aarch32::extract(uint_at(0), 19, 16) == ((unsigned)imm >> 12); + bool a5 = Instruction_aarch32::extract(uint_at(0), 31, 28) == (unsigned)cond; + + return a1 && a2 && a3 && a4 && a5; +} + +bool NativeInstruction::is_ldr(Register dst, Address addr, Assembler::Condition cond) { + assert(addr.get_mode() == Address::imm, "unimplemented"); + assert(addr.get_wb_mode() == Address::off, "unimplemented"); + assert(addr.index() == noreg, "unimplemented"); + assert(addr.offset() == 0, "unimplemented"); + + bool b0 = Instruction_aarch32::extract(uint_at(0), 24, 24) == 1; //P + bool b1 = Instruction_aarch32::extract(uint_at(0), 23, 23) == 1; //U + bool b2 = Instruction_aarch32::extract(uint_at(0), 21, 21) == 0; //W + bool b3 = Instruction_aarch32::extract(uint_at(0), 19, 16) == (unsigned)addr.base(); + bool b4 = Instruction_aarch32::extract(uint_at(0), 11, 0) == 0; + + bool a1 = b0 && b1 && b2 && b3 && b4; //Address encoding + + bool a2 = Instruction_aarch32::extract(uint_at(0), 15, 12) == (unsigned)dst; + bool a3 = Instruction_aarch32::extract(uint_at(0), 20, 20) == 1; + bool a4 = Instruction_aarch32::extract(uint_at(0), 22, 22) == 0; + bool a5 = Instruction_aarch32::extract(uint_at(0), 27, 25) == 0b010; + bool a6 = Instruction_aarch32::extract(uint_at(0), 31, 28) == (unsigned)cond; + + return a1 && a2 && a3 && a4 && a5 && a6; +} + + +bool NativeInstruction::is_movt() { + return Instruction_aarch32::extract(int_at(0), 27, 20) == 0b00110100; +} + +bool NativeInstruction::is_orr() { + return Instruction_aarch32::extract(int_at(0), 27, 21) == 0b0011100; +} + +bool NativeInstruction::is_sigill_zombie_not_entrant() { + return as_uint() == 0xe7fdeafd; // udf #0xdead +} + +void NativeIllegalInstruction::insert(address code_pos) { + *(juint*)code_pos = 0xe7fdeafd; // udf #0xdead +} + +//------------------------------------------------------------------- + +void NativeGeneralJump::verify() { } + +void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { + NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos; + assert(n_jump->is_nop() || n_jump->is_imm_jump(), "not overwrite whats not supposed"); + + CodeBuffer cb(code_pos, instruction_size); + MacroAssembler a(&cb); + + a.b(entry); + + ICache::invalidate_range(code_pos, instruction_size); +} + +// MT-safe patching of a long jump instruction. +void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { + if (NativeFarLdr::is_at(instr_addr+2*arm_insn_sz)) { + assert(NativeInstruction::from(code_buffer)->is_nop(), "code_buffer image"); + assert(NativeImmJump::is_at(instr_addr), "instr_image image"); + // first 'b' prevents NativeFarLdr to recognize patching_prolog, skip it manually + address load_instr = instr_addr+2*arm_insn_sz; + + NativeFarLdr::from(load_instr)->set_data_addr(NativeFarLdr::from(code_buffer)->data_addr()); + + WRITE_MEM_BARRIER; + *(uintptr_t*)instr_addr = *(uintptr_t*)code_buffer; + ICache::invalidate_word(instr_addr); + + assert(NativeFarLdr::is_at(instr_addr), "now valid constant loading"); + } else { + ShouldNotReachHere(); + } +} --- /dev/null 2018-09-25 19:25:16.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/nativeInst_aarch32.hpp 2018-09-25 19:25:16.000000000 +0300 @@ -0,0 +1,542 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_NATIVEINST_AARCH32_HPP +#define CPU_AARCH32_VM_NATIVEINST_AARCH32_HPP + +#include "asm/assembler.hpp" +#include "runtime/icache.hpp" +#include "runtime/os.hpp" + +// We have interfaces for the following instructions: +// - NativeInstruction +// - - NativeCall +// - - NativeMovConstReg +// - - NativeMovRegMem +// - - NativeMovRegMemPatching +// - - NativeJump +// - - NativeIllegalOpCode +// - - NativeGeneralJump +// - - NativeReturn +// - - NativeReturnX (return with argument) +// - - NativePushConst +// - - NativeTstRegMem + +// The base class for different kinds of native instruction abstractions. +// Provides the primitive operations to manipulate code relative to this. + +class NativeInstruction { + friend class Relocation; + friend bool is_NativeCallTrampolineStub_at(address); + public: + enum { arm_insn_sz = 4 }; + + inline bool is_nop(); + inline bool is_barrer(); + inline bool is_illegal(); + inline bool is_return(); + inline bool is_jump_or_nop(); + inline bool is_cond_jump(); + bool is_safepoint_poll(); + bool is_movt(); + bool is_orr(); + bool is_sigill_zombie_not_entrant(); + + bool is_movt(Register dst, unsigned imm, Assembler::Condition cond = Assembler::C_DFLT); + bool is_movw(Register dst, unsigned imm, Assembler::Condition cond = Assembler::C_DFLT); + bool is_ldr(Register dst, Address addr, Assembler::Condition cond = Assembler::C_DFLT); + + inline bool is_jump() const; + inline bool is_call() const; + + inline bool is_mov_const_reg() const; + inline bool is_reg_call() const; + inline bool is_imm_call() const; + inline bool is_reg_jump() const; + inline bool is_imm_jump() const; + + protected: + address addr() const { return address(this); } + // TODO remove this, every command is 4byte long + + address addr_at(int offset) const { return addr() + offset; } + + s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } + u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } + + jint int_at(int offset) const { return *(jint*) addr_at(offset); } + juint uint_at(int offset) const { return *(juint*) addr_at(offset); } + + address ptr_at(int offset) const { return *(address*) addr_at(offset); } + + oop oop_at (int offset) const { return *(oop*) addr_at(offset); } + + + void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; } + void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } + void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; } + void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; } + void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } + + static juint as_uint(address addr) { + return *(juint *) addr; + } + + juint as_uint() const { + return as_uint(addr()); + } + + void set_uint(juint v) { + *(juint *) addr() = v; + } + + public: + + // unit test stuff + static void test() {} // override for testing + + static bool is_at(address address); + static NativeInstruction* from(address address); + +}; + +inline NativeInstruction* nativeInstruction_at(address addr) { + return NativeInstruction::from(addr); +} + +inline NativeInstruction* nativeInstruction_at(uint32_t *addr) { + return NativeInstruction::from(address(addr)); +} + +class NativeBranchType: public NativeInstruction { + protected: + static bool is_branch_type(uint32_t insn); + void patch_offset_to(address addr); + public: + enum { + instruction_size = arm_insn_sz, + }; + + address next_instruction_address() const { + return addr() + arm_insn_sz; + } +}; + +class NativeFarLdr: public NativeInstruction { + private: + static address skip_patching_prolog(address addr); + public: + static bool is_at(address addr); + static NativeFarLdr* from(address addr); + intptr_t *data_addr(); + void set_data_addr(intptr_t *data_addr); + address next_instruction_address() const; +}; + +class NativeMovConstReg: public NativeInstruction { + friend class Relocation; + friend class NativeMovRegMem; + friend class NativeGeneralJump; + friend class NativeFarLdr; + + protected: + static bool is_ldr_literal_at(address instr, Register from = r15_pc); + static bool is_far_ldr_literal_at(address instr); + static bool is_movw_movt_at(address instr); + static bool is_mov_n_three_orr_at(address instr); + public: + enum { + ldr_sz = 1 * arm_insn_sz, + far_ldr_sz = 2 * arm_insn_sz, + movw_movt_pair_sz = 2 * arm_insn_sz, + mov_n_three_orr_sz = 4 * arm_insn_sz, + min_instruction_size = 1 * arm_insn_sz, + max_instruction_size = 4 * arm_insn_sz, + }; + + address next_instruction_address() const { + if (is_ldr_literal_at(addr())) { + return addr() + ldr_sz; + } else if (is_far_ldr_literal_at(addr())) { + return NativeFarLdr::from(addr())->next_instruction_address();; + } else if (is_movw_movt_at(addr())) { + return addr() + movw_movt_pair_sz; + } else if (is_mov_n_three_orr_at(addr())) { + return addr() + mov_n_three_orr_sz; + } + + // Unknown instruction in NativeMovConstReg + ShouldNotReachHere(); + return NULL; + } + + intptr_t data() const; + void set_data(intptr_t x); + + Register destination() const; + void set_destination(Register r); + + void flush() { + ICache::invalidate_range(addr(), max_instruction_size); + } + + void verify(); + void print(); + + // unit test stuff + static void test() {} + + // Creation + inline friend NativeMovConstReg* nativeMovConstReg_at(address address); + + static NativeMovConstReg* before(address addr) { + address mov = NULL; + if (is_ldr_literal_at(addr - ldr_sz)) { + mov = addr - ldr_sz; + } else if (is_far_ldr_literal_at(addr - far_ldr_sz)) { + mov = addr - far_ldr_sz; + } else if (is_movw_movt_at(addr - movw_movt_pair_sz)) { + mov = addr - movw_movt_pair_sz; + } else if (is_mov_n_three_orr_at(addr - mov_n_three_orr_sz)) { + mov = addr - mov_n_three_orr_sz; + } + guarantee(mov, "Can't find NativeMovConstReg before"); + return NativeMovConstReg::from(mov); + } + + static bool is_at(address instr); + static NativeMovConstReg* from(address addr); +}; + +inline NativeMovConstReg* nativeMovConstReg_at(address address) { + return NativeMovConstReg::from(address); +} + +class NativeTrampolineCall: public NativeInstruction { + public: + // NativeTrampolineCall size is always equal to NativeCall::instruction_size + address destination() const; + void set_destination(address dest); + void set_destination_mt_safe(address dest, bool assert_lock = true); + + static bool is_at(address address); + static NativeTrampolineCall* from(address address); + + address next_instruction_address() const; +}; + +class NativeRegCall: public NativeBranchType { + public: + + Register destination() const; + void set_destination(Register r); + + static bool is_at(address address); + static NativeRegCall* from(address address); +}; + +class NativeCall: public NativeInstruction { + friend class Relocation; + protected: + NativeInstruction* is_long_jump_or_call_at(address addr); + + // NativeCall represents: + // NativeImmCall, + // NativeMovConstReg + NativeBranchType, + // NativeTrampolineCall + public: + enum { + max_instruction_size = 5 * arm_insn_sz + }; + + static int instruction_size; +#ifdef ASSERT + STATIC_ASSERT(NativeMovConstReg::movw_movt_pair_sz + + NativeRegCall::instruction_size <= (int) max_instruction_size); + STATIC_ASSERT(NativeMovConstReg::mov_n_three_orr_sz + + NativeRegCall::instruction_size <= (int) max_instruction_size); +#endif + + address destination() const; + void set_destination(address dest); + + static void init(); + void verify_alignment() { ; } + void verify(); + void print(); + + address instruction_address() const { return addr_at(0); } + address next_instruction_address() const; + address return_address() const; + + // MT-safe patching of a call instruction. + static void insert(address code_pos, address entry); + + // Similar to replace_mt_safe, but just changes the destination. The + // important thing is that free-running threads are able to execute + // this call instruction at all times. If the call is an immediate BL + // instruction we can simply rely on atomicity of 32-bit writes to + // make sure other threads will see no intermediate states. + + // We cannot rely on locks here, since the free-running threads must run at + // full speed. + // + // Used in the runtime linkage of calls; see class CompiledIC. + // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) + + // The parameter assert_lock disables the assertion during code generation. + void set_destination_mt_safe(address dest, bool assert_lock = true); + + static bool is_at(address instr); + static NativeCall* from(address instr); + + static bool is_call_before(address return_address); +}; + +inline address NativeTrampolineCall::next_instruction_address() const { + assert(is_at(addr()), "not call"); + return addr() + NativeCall::instruction_size; +} + +inline NativeCall* nativeCall_at(address address) { + return NativeCall::from(address); +} + +// An interface for accessing/manipulating native moves of the form: +// mov[b/w/l/q] [reg + offset], reg (instruction_code_reg2mem) +// mov[b/w/l/q] reg, [reg+offset] (instruction_code_mem2reg +// mov[s/z]x[w/b/q] [reg + offset], reg +// fld_s [reg+offset] +// fld_d [reg+offset] +// fstp_s [reg + offset] +// fstp_d [reg + offset] +// mov_literal64 scratch, ; mov[b/w/l/q] 0(scratch),reg | mov[b/w/l/q] reg,0(scratch) +// +// Warning: These routines must be able to handle any instruction sequences +// that are generated as a result of the load/store byte,word,long +// macros. For example: The load_unsigned_byte instruction generates +// an xor reg,reg inst prior to generating the movb instruction. This +// class must skip the xor instruction. + + +// TODO Review +class NativeMovRegMem: public NativeInstruction { + public: + enum { + instruction_size = 2 * arm_insn_sz, // TODO check this + }; + // helper + int instruction_start() const; + + address instruction_address() const; + + address next_instruction_address() const; + + int offset() const; + + void set_offset(int x); + + void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } + + void verify(); + void print (); + + // unit test stuff + static void test() {} + + private: + inline friend NativeMovRegMem* nativeMovRegMem_at (address address); +}; + +inline NativeMovRegMem* nativeMovRegMem_at (address address) { + NativeMovRegMem* test = (NativeMovRegMem*) address; +#ifdef ASSERT + test->verify(); +#endif + return test; +} + +class NativeMovRegMemPatching: public NativeMovRegMem { + private: + friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {Unimplemented(); return 0; } +}; + +class NativeJump: public NativeInstruction { + public: + enum { + instruction_size = NativeMovConstReg::movw_movt_pair_sz + NativeBranchType::instruction_size, + }; + address instruction_address() const { + return addr(); + } + + address next_instruction_address() const; + + address jump_destination() const; + void set_jump_destination(address dest); + + // Creation + inline friend NativeJump* nativeJump_at(address address); + + void verify(); + + // Unit testing stuff + static void test() {} + + // Insertion of native jump instruction + static void insert(address code_pos, address entry); + // MT-safe insertion of native jump at verified method entry + static void check_verified_entry_alignment(address entry, address verified_entry); + static void patch_verified_entry(address entry, address verified_entry, address dest); + + static bool is_at(address instr); + static NativeJump* from(address instr); +}; + +inline NativeJump* nativeJump_at(address addr) { + return NativeJump::from(addr); +} + +// TODO We don't really need NativeGeneralJump, NativeJump should be able to do +// everything that General Jump would. Make this only interface to NativeJump +// from share code (c1_Runtime) +class NativeGeneralJump: public NativeJump { +public: + enum { + instruction_size = arm_insn_sz, + }; + + static void insert_unconditional(address code_pos, address entry); + static void replace_mt_safe(address instr_addr, address code_buffer); + static void verify(); +}; + +inline NativeGeneralJump* nativeGeneralJump_at(address address) { + NativeGeneralJump* jump = (NativeGeneralJump*)(address); + debug_only(jump->verify();) + return jump; +} + +class NativePopReg : public NativeInstruction { + public: + // Insert a pop instruction + static void insert(address code_pos, Register reg); +}; + + +class NativeIllegalInstruction: public NativeInstruction { + public: + // Insert illegal opcode as specific address + static void insert(address code_pos); +}; + +// return instruction that does not pop values of the stack +class NativeReturn: public NativeInstruction { + public: +}; + +// return instruction that does pop values of the stack +class NativeReturnX: public NativeInstruction { + public: +}; + +// Simple test vs memory +class NativeTstRegMem: public NativeInstruction { + public: +}; + +inline bool NativeInstruction::is_nop() { + return (as_uint() & 0x0fffffff) == 0x0320f000; +} + +inline bool NativeInstruction::is_barrer() { + return (as_uint() == 0xf57ff05b /* dmb ish */ || + as_uint() == 0xee070fba /* mcr 15, 0, r0, cr7, cr10, {5}) */); +} + +inline bool NativeInstruction::is_jump_or_nop() { + return is_nop() || is_jump(); +} + +class NativeImmCall: public NativeBranchType { + public: + address destination() const; + void set_destination(address dest); + + static bool is_at(address address); + static NativeImmCall* from(address address); +}; + +class NativeImmJump: public NativeBranchType { + public: + + address destination() const; + void set_destination(address r); + + static bool is_at(address address); + static NativeImmJump* from(address address); +}; + +class NativeRegJump: public NativeBranchType { + public: + + Register destination() const; + void set_destination(Register r); + + static bool is_at(address address); + static NativeRegJump* from(address address); +}; + +inline bool NativeInstruction::is_call() const { return NativeCall::is_at(addr()); } +inline bool NativeInstruction::is_jump() const { return NativeJump::is_at(addr()); } +inline bool NativeInstruction::is_mov_const_reg() const { return NativeMovConstReg::is_at(addr()); } +inline bool NativeInstruction::is_imm_call() const { return NativeImmCall::is_at(addr()); } +inline bool NativeInstruction::is_reg_call() const { return NativeRegCall::is_at(addr()); } +inline bool NativeInstruction::is_imm_jump() const { return NativeImmJump::is_at(addr()); } +inline bool NativeInstruction::is_reg_jump() const { return NativeRegJump::is_at(addr()); } + +inline NativeCall* nativeCall_before(address return_address) { + if (NativeTrampolineCall::is_at(return_address - NativeCall::instruction_size)) { + return NativeCall::from(return_address - NativeCall::instruction_size); + } + if (NativeMovConstReg::is_at(return_address - NativeCall::instruction_size)) { + NativeMovConstReg *nm = NativeMovConstReg::from(return_address - NativeCall::instruction_size); + address next_instr = nm->next_instruction_address(); + if (NativeRegCall::is_at(next_instr) && + NativeRegCall::from(next_instr)->destination() == nm->destination()) { + return NativeCall::from(return_address - NativeCall::instruction_size); + } + } + if (NativeImmCall::is_at(return_address - NativeBranchType::instruction_size)) { + return NativeCall::from(return_address - NativeBranchType::instruction_size); + } + + ShouldNotReachHere(); + return NULL; +} + +#endif // CPU_AARCH32_VM_NATIVEINST_AARCH32_HPP --- /dev/null 2018-09-25 19:25:17.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/registerMap_aarch32.hpp 2018-09-25 19:25:17.000000000 +0300 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_REGISTERMAP_AARCH32_HPP +#define CPU_AARCH32_VM_REGISTERMAP_AARCH32_HPP + + private: + // This is a hook for finding a register in a "well-known" location, such as + // a register block of a predetermined format. Since there is none, we just + // return NULL. See registerMap_sparc.hpp for an example of grabbing + // registers from register save areas of a standard layout. + address pd_location(VMReg reg) const { + return NULL; + } + + // No platform dependent state to clear, initialize, or copy + void pd_clear() {} + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + +#endif // CPU_AARCH32_VM_REGISTERMAP_AARCH32_HPP --- /dev/null 2018-09-25 19:25:18.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/register_aarch32.cpp 2018-09-25 19:25:18.000000000 +0300 @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/register.hpp" + +const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers; +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + + FloatRegisterImpl::number_of_registers; + +const char* RegisterImpl::name() const { + const char* names[number_of_registers] = { + "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" + }; + return is_valid() ? names[encoding()] : "noreg"; +} + +const char* FloatRegisterImpl::name() const { + const char* names[number_of_registers] = { + "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", + "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", + "s16", "s17", "s18", "s19", "s20", "s21", "s22", "s23", + "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31" + }; + return is_valid() ? names[encoding()] : "fnoreg"; +} --- /dev/null 2018-09-25 19:25:19.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/register_aarch32.hpp 2018-09-25 19:25:19.000000000 +0300 @@ -0,0 +1,483 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_REGISTER_AARCH32_HPP +#define CPU_AARCH32_VM_REGISTER_AARCH32_HPP + +#include "asm/register.hpp" + +class VMRegImpl; +typedef VMRegImpl* VMReg; + +// Implementation of integer registers for AArch32 architecture + +class RegisterImpl; +typedef RegisterImpl* Register; + +inline Register as_Register(int encoding) { + return (Register)(intptr_t) encoding; +} + +class RegisterImpl : public AbstractRegisterImpl { + public: + enum { + number_of_registers = 16 + }; + + // Construction + inline friend Register as_Register(int encoding); + + // Accessors + int encoding() const { + assert(is_valid(), "invalid register"); + return (intptr_t) this; + } + int encoding_nocheck() const { + return (intptr_t) this; + } + VMReg as_VMReg(); + Register successor() const { + return as_Register(encoding() + 1); + } + + // Testers + bool is_valid() const { + return 0 <= (intptr_t) this && (intptr_t) this < number_of_registers; + } + + // Return the bit which represents this register. This is intended to be + // used in bitmasks. See RegSet class below. + unsigned long bit(bool should_set = true) const { + return should_set ? 1 << encoding() : 0; + } + + // Return the name of this register + const char* name() const; +}; + +// Integer registers of AArch32 architecture +#define R(r) ((Register)(r)) + +#define Rmh_SP_save r11 // for C2 + +CONSTANT_REGISTER_DECLARATION(Register, noreg, -1); + +CONSTANT_REGISTER_DECLARATION(Register, r0, 0); +CONSTANT_REGISTER_DECLARATION(Register, r1, 1); +CONSTANT_REGISTER_DECLARATION(Register, r2, 2); +CONSTANT_REGISTER_DECLARATION(Register, r3, 3); +CONSTANT_REGISTER_DECLARATION(Register, r4, 4); +CONSTANT_REGISTER_DECLARATION(Register, r5, 5); +CONSTANT_REGISTER_DECLARATION(Register, r6, 6); +CONSTANT_REGISTER_DECLARATION(Register, r7, 7); +CONSTANT_REGISTER_DECLARATION(Register, r8, 8); +CONSTANT_REGISTER_DECLARATION(Register, r9, 9); +CONSTANT_REGISTER_DECLARATION(Register, r10, 10); +CONSTANT_REGISTER_DECLARATION(Register, r11, 11); +CONSTANT_REGISTER_DECLARATION(Register, r12, 12); +CONSTANT_REGISTER_DECLARATION(Register, r13, 13); +CONSTANT_REGISTER_DECLARATION(Register, r14, 14); +CONSTANT_REGISTER_DECLARATION(Register, r15, 15); + +// Implementation of floating point registers for AArch32 (VFPv3-D16) +// architecture + +class FloatRegisterImpl; +typedef FloatRegisterImpl* FloatRegister; + +// Return FloatRegister corresponding to the given s-type (aka f-type in this +// port) register number +inline FloatRegister as_FloatRegister(int encoding) { + return (FloatRegister)(intptr_t) encoding; +} + +// Return FloatRegister corresponding to the given d-type register number +inline FloatRegister as_DoubleFloatRegister(int encoding) { + return as_FloatRegister(2 * encoding); +} + +class FloatRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // VFPv3-D16 architecture includes 16 doubleword registers, which can be + // also observed as 32 singleword registers. We count the singleword + // registers here. + number_of_registers = 32 + }; + + enum FloatRegisterSize { + SINGLE = 1, + DOUBLE = 2, + QUAD = 4 + }; + + // Construction + inline friend FloatRegister as_FloatRegister(int encoding); + inline friend FloatRegister as_DoubleFloatRegister(int encoding); + + // Accessors + int encoding() const { + assert(is_valid(), "invalid register"); + return (intptr_t) this; + } + int encoding_nocheck() const { + return (intptr_t) this; + } + VMReg as_VMReg(); + FloatRegister successor(enum FloatRegisterSize size) const { + return (as_FloatRegister((encoding() + (int)size) % number_of_registers | + (encoding() + (int)size) / number_of_registers)); + } + + // Testers + bool is_valid() const { + return 0 <= (intptr_t) this && (intptr_t) this < number_of_registers; + } + + // Return the bit which represents this register. This is intended to be + // used in bitmasks. See FloatRegSet class below. + unsigned long bit(bool should_set = true) const { + return should_set ? 1 << encoding() : 0; + } + + // Return the name of this register + const char* name() const; +}; + +// Floating point registers of AArch32 (VFPv3-D16, D32 and SIMD) architecture + +// Only the first 8 doubleword registers can be used for parameter passing +// and thus are caller-saved. The rest 8 registers are callee-saved. +// In VFPv3-D32 there are additional 16 doubleword registers that are +// caller-saved again. + +// Here we introduce the symbolic names for doubleword registers and the +// corresponding singleword views for the first 16 of them. The instruction +// set allows us to encode the doubleword register numbers directly using +// the constants below. + +// The respective names are as well defined for quad-word registers with +// encoding set by the same principles. + +CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg, -1); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, d0, 0); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d1, 2); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d2, 4); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d3, 6); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d4, 8); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d5, 10); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d6, 12); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d7, 14); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d8, 16); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d9, 18); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d10, 20); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d11, 22); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d12, 24); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d13, 26); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d14, 28); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d15, 30); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d16, 1); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d17, 3); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d18, 5); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d19, 7); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d20, 9); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d21, 11); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d22, 13); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d23, 15); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d24, 17); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d25, 19); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d26, 21); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d27, 23); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d28, 25); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d29, 27); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d30, 29); +CONSTANT_REGISTER_DECLARATION(FloatRegister, d31, 31); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, q0, 0); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q1, 4); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q2, 8); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q3, 12); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q4, 16); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q5, 20); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q6, 24); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q7, 28); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q8, 1); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q9, 5); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q10, 9); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q11, 13); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q12, 17); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q13, 21); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q14, 25); +CONSTANT_REGISTER_DECLARATION(FloatRegister, q15, 29); + +CONSTANT_REGISTER_DECLARATION(FloatRegister, f0, 0); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f1, 1); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f2, 2); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f3, 3); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f4, 4); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f5, 5); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f6, 6); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f7, 7); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f8, 8); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f9, 9); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f10, 10); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f11, 11); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f12, 12); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f13, 13); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f14, 14); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f15, 15); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f16, 16); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f17, 17); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f18, 18); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f19, 19); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f20, 20); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f21, 21); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f22, 22); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f23, 23); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f24, 24); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f25, 25); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f26, 26); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f27, 27); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f28, 28); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f29, 29); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f30, 30); +CONSTANT_REGISTER_DECLARATION(FloatRegister, f31, 31); + +// Set of singleword floating point registers + +class FloatRegSet { + private: + uint32_t _bitset; + + FloatRegSet(uint32_t bitset) : _bitset(bitset) { } + + public: + FloatRegSet() : _bitset(0) { } + + FloatRegSet(FloatRegister r1) : _bitset(r1->bit()) { } + + FloatRegSet operator+(const FloatRegSet aSet) const { + FloatRegSet result(_bitset | aSet._bitset); + return result; + } + + FloatRegSet operator-(const FloatRegSet aSet) const { + FloatRegSet result(_bitset & ~aSet._bitset); + return result; + } + + FloatRegSet& operator+=(const FloatRegSet aSet) { + *this = *this + aSet; + return *this; + } + + static FloatRegSet of(FloatRegister r1) { + return FloatRegSet(r1); + } + + static FloatRegSet of(FloatRegister r1, FloatRegister r2) { + return of(r1) + r2; + } + + static FloatRegSet of(FloatRegister r1, FloatRegister r2, FloatRegister r3) { + return of(r1, r2) + r3; + } + + static FloatRegSet of(FloatRegister r1, FloatRegister r2, FloatRegister r3, + FloatRegister r4) { + return of(r1, r2, r3) + r4; + } + + static FloatRegSet range(FloatRegister start, FloatRegister end) { + uint32_t bits = ~0; + bits <<= start->encoding(); + bits <<= 31 - end->encoding(); + bits >>= 31 - end->encoding(); + return FloatRegSet(bits); + } + + uint32_t bits() const { + return _bitset; + } +}; + +// Set of doubleword floating point registers + +class DoubleFloatRegSet { + private: + uint32_t _bitset; + + DoubleFloatRegSet(uint32_t bitset) : _bitset(bitset) { } + + public: + DoubleFloatRegSet() : _bitset(0) { } + + DoubleFloatRegSet(FloatRegister r1) : _bitset(1 << (r1->encoding()>>1)+((r1->encoding()%2)?16:0)) { } + + DoubleFloatRegSet operator+(const DoubleFloatRegSet aSet) const { + DoubleFloatRegSet result(_bitset | aSet._bitset); + return result; + } + + DoubleFloatRegSet operator-(const DoubleFloatRegSet aSet) const { + DoubleFloatRegSet result(_bitset & ~aSet._bitset); + return result; + } + + DoubleFloatRegSet& operator+=(const DoubleFloatRegSet aSet) { + *this = *this + aSet; + return *this; + } + + static DoubleFloatRegSet of(FloatRegister r1) { + return DoubleFloatRegSet(r1); + } + + static DoubleFloatRegSet of(FloatRegister r1, FloatRegister r2) { + return of(r1) + r2; + } + + static DoubleFloatRegSet of(FloatRegister r1, FloatRegister r2, + FloatRegister r3) { + return of(r1, r2) + r3; + } + + static DoubleFloatRegSet of(FloatRegister r1, FloatRegister r2, + FloatRegister r3, FloatRegister r4) { + return of(r1, r2, r3) + r4; + } + + static DoubleFloatRegSet range(FloatRegister start, FloatRegister end) { + int start_reg = (start->encoding() >> 1)+((start->encoding()%2)?16:0); + int end_reg = (end->encoding() >> 1)+((end->encoding()%2)?16:0); + uint32_t bits = ~0; + bits <<= start_reg; + bits <<= 31 - end_reg; + bits >>= 31 - end_reg; + return DoubleFloatRegSet(bits); + } + + uint32_t bits() const { + return _bitset; + } +}; + +// Total number of registers of all sorts + +class ConcreteRegisterImpl : public AbstractRegisterImpl { + public: + enum { + // Here we count the total number of 32-bit slots available in registers. + // This number must be large enough to cover REG_COUNT (defined by C2) + // registers. There is no requirement that any ordering here matches + // any ordering C2 gives its OptoReg's. + // C2 port is made to be able to operate on all 32 double registers of VFPD32 + // but the register count is 32 since high half of 32-bit regs are not addressable + // so need to double the amount of known registers to get expected 64 +#ifndef COMPILER2 + number_of_registers = RegisterImpl::number_of_registers + + FloatRegisterImpl::number_of_registers +#else + number_of_registers = RegisterImpl::number_of_registers + + (FloatRegisterImpl::number_of_registers*2) + + 2 +#endif + }; + + static const int max_gpr; + static const int max_fpr; +}; + + +// Set of integer registers + +class RegSet { + private: + uint32_t _bitset; + + RegSet(uint32_t bitset) : _bitset(bitset) { } + + public: + RegSet() : _bitset(0) { } + + RegSet(Register r1) : _bitset(r1->bit()) { } + + RegSet operator+(const RegSet aSet) const { + RegSet result(_bitset | aSet._bitset); + return result; + } + + RegSet operator-(const RegSet aSet) const { + RegSet result(_bitset & ~aSet._bitset); + return result; + } + + RegSet& operator+=(const RegSet aSet) { + *this = *this + aSet; + return *this; + } + + static RegSet of(Register r1) { + return RegSet(r1); + } + + static RegSet of(Register r1, Register r2) { + return of(r1) + r2; + } + + static RegSet of(Register r1, Register r2, Register r3) { + return of(r1, r2) + r3; + } + + static RegSet of(Register r1, Register r2, Register r3, Register r4) { + return of(r1, r2, r3) + r4; + } + + static RegSet of(Register r1, Register r2, Register r3, Register r4, Register r5) { + return of(r1, r2, r3, r4) + r5; + } + + static RegSet of(Register r1, Register r2, Register r3, Register r4, Register r5, Register r6) { + return of(r1, r2, r3, r4, r5) + r6; + } + + static RegSet range(Register start, Register end) { + uint32_t bits = ~0; + bits <<= start->encoding(); + bits <<= 31 - end->encoding(); + bits >>= 31 - end->encoding(); + return RegSet(bits); + } + + uint32_t bits() const { + return _bitset; + } +}; + + +#endif // CPU_AARCH32_VM_REGISTER_AARCH32_HPP --- /dev/null 2018-09-25 19:25:20.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/register_definitions_aarch32.cpp 2018-09-25 19:25:20.000000000 +0300 @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "asm/register.hpp" + +REGISTER_DEFINITION(Register, noreg); + +REGISTER_DEFINITION(Register, r0); +REGISTER_DEFINITION(Register, r1); +REGISTER_DEFINITION(Register, r2); +REGISTER_DEFINITION(Register, r3); +REGISTER_DEFINITION(Register, r4); +REGISTER_DEFINITION(Register, r5); +REGISTER_DEFINITION(Register, r6); +REGISTER_DEFINITION(Register, r7); +REGISTER_DEFINITION(Register, r8); +REGISTER_DEFINITION(Register, r9); +REGISTER_DEFINITION(Register, r10); +REGISTER_DEFINITION(Register, r11); +REGISTER_DEFINITION(Register, r12); +REGISTER_DEFINITION(Register, r13); +REGISTER_DEFINITION(Register, r14); +REGISTER_DEFINITION(Register, r15); + +REGISTER_DEFINITION(FloatRegister, fnoreg); + +REGISTER_DEFINITION(FloatRegister, d0); +REGISTER_DEFINITION(FloatRegister, d1); +REGISTER_DEFINITION(FloatRegister, d2); +REGISTER_DEFINITION(FloatRegister, d3); +REGISTER_DEFINITION(FloatRegister, d4); +REGISTER_DEFINITION(FloatRegister, d5); +REGISTER_DEFINITION(FloatRegister, d6); +REGISTER_DEFINITION(FloatRegister, d7); +REGISTER_DEFINITION(FloatRegister, d8); +REGISTER_DEFINITION(FloatRegister, d9); +REGISTER_DEFINITION(FloatRegister, d10); +REGISTER_DEFINITION(FloatRegister, d11); +REGISTER_DEFINITION(FloatRegister, d12); +REGISTER_DEFINITION(FloatRegister, d13); +REGISTER_DEFINITION(FloatRegister, d14); +REGISTER_DEFINITION(FloatRegister, d15); +REGISTER_DEFINITION(FloatRegister, d16); +REGISTER_DEFINITION(FloatRegister, d17); +REGISTER_DEFINITION(FloatRegister, d18); +REGISTER_DEFINITION(FloatRegister, d19); +REGISTER_DEFINITION(FloatRegister, d20); +REGISTER_DEFINITION(FloatRegister, d21); +REGISTER_DEFINITION(FloatRegister, d22); +REGISTER_DEFINITION(FloatRegister, d23); +REGISTER_DEFINITION(FloatRegister, d24); +REGISTER_DEFINITION(FloatRegister, d25); +REGISTER_DEFINITION(FloatRegister, d26); +REGISTER_DEFINITION(FloatRegister, d27); +REGISTER_DEFINITION(FloatRegister, d28); +REGISTER_DEFINITION(FloatRegister, d29); +REGISTER_DEFINITION(FloatRegister, d30); +REGISTER_DEFINITION(FloatRegister, d31); + +REGISTER_DEFINITION(FloatRegister, q0); +REGISTER_DEFINITION(FloatRegister, q1); +REGISTER_DEFINITION(FloatRegister, q2); +REGISTER_DEFINITION(FloatRegister, q3); +REGISTER_DEFINITION(FloatRegister, q4); +REGISTER_DEFINITION(FloatRegister, q5); +REGISTER_DEFINITION(FloatRegister, q6); +REGISTER_DEFINITION(FloatRegister, q7); +REGISTER_DEFINITION(FloatRegister, q8); +REGISTER_DEFINITION(FloatRegister, q9); +REGISTER_DEFINITION(FloatRegister, q10); +REGISTER_DEFINITION(FloatRegister, q11); +REGISTER_DEFINITION(FloatRegister, q12); +REGISTER_DEFINITION(FloatRegister, q13); +REGISTER_DEFINITION(FloatRegister, q14); +REGISTER_DEFINITION(FloatRegister, q15); + +REGISTER_DEFINITION(FloatRegister, f0); +REGISTER_DEFINITION(FloatRegister, f1); +REGISTER_DEFINITION(FloatRegister, f2); +REGISTER_DEFINITION(FloatRegister, f3); +REGISTER_DEFINITION(FloatRegister, f4); +REGISTER_DEFINITION(FloatRegister, f5); +REGISTER_DEFINITION(FloatRegister, f6); +REGISTER_DEFINITION(FloatRegister, f7); +REGISTER_DEFINITION(FloatRegister, f8); +REGISTER_DEFINITION(FloatRegister, f9); +REGISTER_DEFINITION(FloatRegister, f10); +REGISTER_DEFINITION(FloatRegister, f11); +REGISTER_DEFINITION(FloatRegister, f12); +REGISTER_DEFINITION(FloatRegister, f13); +REGISTER_DEFINITION(FloatRegister, f14); +REGISTER_DEFINITION(FloatRegister, f15); +REGISTER_DEFINITION(FloatRegister, f16); +REGISTER_DEFINITION(FloatRegister, f17); +REGISTER_DEFINITION(FloatRegister, f18); +REGISTER_DEFINITION(FloatRegister, f19); +REGISTER_DEFINITION(FloatRegister, f20); +REGISTER_DEFINITION(FloatRegister, f21); +REGISTER_DEFINITION(FloatRegister, f22); +REGISTER_DEFINITION(FloatRegister, f23); +REGISTER_DEFINITION(FloatRegister, f24); +REGISTER_DEFINITION(FloatRegister, f25); +REGISTER_DEFINITION(FloatRegister, f26); +REGISTER_DEFINITION(FloatRegister, f27); +REGISTER_DEFINITION(FloatRegister, f28); +REGISTER_DEFINITION(FloatRegister, f29); +REGISTER_DEFINITION(FloatRegister, f30); +REGISTER_DEFINITION(FloatRegister, f31); + + +REGISTER_DEFINITION(Register, c_rarg0); +REGISTER_DEFINITION(Register, c_rarg1); +REGISTER_DEFINITION(Register, c_rarg2); +REGISTER_DEFINITION(Register, c_rarg3); + +REGISTER_DEFINITION(Register, j_rarg0); +REGISTER_DEFINITION(Register, j_rarg1); +REGISTER_DEFINITION(Register, j_rarg2); +REGISTER_DEFINITION(Register, j_rarg3); + +REGISTER_DEFINITION(Register, rdispatch); +REGISTER_DEFINITION(Register, rbcp); +REGISTER_DEFINITION(Register, rlocals); +REGISTER_DEFINITION(Register, rcpool); +REGISTER_DEFINITION(Register, rthread); +REGISTER_DEFINITION(Register, rscratch1); +REGISTER_DEFINITION(Register, rmethod); +REGISTER_DEFINITION(Register, rfp); +REGISTER_DEFINITION(Register, rscratch2); +REGISTER_DEFINITION(Register, sp); +REGISTER_DEFINITION(Register, lr); +REGISTER_DEFINITION(Register, r15_pc); --- /dev/null 2018-09-25 19:25:21.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/relocInfo_aarch32.cpp 2018-09-25 19:25:21.000000000 +0300 @@ -0,0 +1,144 @@ +/* + * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/relocInfo.hpp" +#include "nativeInst_aarch32.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/safepoint.hpp" + +void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { + + if (NativeFarLdr::is_at(addr())) { + NativeFarLdr *nal = NativeFarLdr::from(addr()); + address const_addr = NULL; + switch(type()) { + case relocInfo::oop_type: + const_addr = (address)code()->oop_addr_at(((oop_Relocation *)this)->oop_index()); + assert(*(address*)const_addr == x, "error in memory relocation"); + break; + case relocInfo::section_word_type: + const_addr = ((section_word_Relocation*)this)->target(); + assert(const_addr == x, "error in memory relocation"); + break; + default: + ShouldNotReachHere(); + } + assert(const_addr, "should not be NULL"); + if (verify_only) { + guarantee(nal->data_addr() == (intptr_t*) const_addr, "instructions must match"); + return; + } + nal->set_data_addr((intptr_t*) const_addr); + } else { + NativeMovConstReg *nm = NativeMovConstReg::from(addr()); + if (verify_only) { + guarantee(nm->data() == (intptr_t) x, "instructions must match"); + return; + } + nm->set_data((intptr_t) x); + } +} + +address Relocation::pd_call_destination(address orig_addr) { + intptr_t adj = 0; + if (orig_addr != NULL) { + // We just moved this call instruction from orig_addr to addr(). + // This means its target will appear to have grown by addr() - orig_addr. + adj = -( addr() - orig_addr ); + } + + NativeInstruction *ni = NativeInstruction::from(addr()); + + // Checking from shortest encoding size to longets, + // to avoid access beyond CodeCache boundary + if (NativeImmCall::is_at(addr())) { + return NativeImmCall::from(addr())->destination() + adj; + } else if (NativeImmJump::is_at(addr())) { + return NativeImmJump::from(addr())->destination() + adj; + } else if (NativeCall::is_at(addr())) { + return NativeCall::from(addr())->destination(); + } else if (NativeJump::is_at(addr())) { + return NativeJump::from(addr())->jump_destination(); + } + + ShouldNotReachHere(); + return NULL; +} + +void Relocation::pd_set_call_destination(address x) { + assert(addr() != x, "call instruction in an infinite loop"); // FIXME what's wrong to _generate_ loop? + NativeInstruction *ni = NativeInstruction::from(addr()); + + // Checking from shortest encoding size to longets, + // to avoid access beyond CodeCache boundary + if (NativeImmCall::is_at(addr())) { + NativeImmCall::from(addr())->set_destination(x); + } else if (NativeImmJump::is_at(addr())) { + NativeImmJump::from(addr())->set_destination(x); + } else if (NativeCall::is_at(addr())) { + NativeCall::from(addr())->set_destination(x); + } else if (NativeJump::is_at(addr())) { + NativeJump::from(addr())->set_jump_destination(x); + } else { + ShouldNotReachHere(); + } + + assert(pd_call_destination(addr()) == x, "fail in reloc"); +} + +address* Relocation::pd_address_in_code() { + ShouldNotCallThis(); + return NULL; +} + +address Relocation::pd_get_address_from_code() { + ShouldNotCallThis(); + return NULL; +} + +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { + NativeInstruction *ni = NativeInstruction::from(addr()); + if (ni->is_mov_const_reg()) { + address old_addr = old_addr_for(addr(), src, dest); + NativeMovConstReg *nm2 = NativeMovConstReg::from(old_addr); + NativeMovConstReg::from(addr())->set_data(nm2->data()); + } else { +#if 0 + warning("TODO: poll_Relocation::fix_relocation_after_move: " + "ensure relocating does nothing on relative instruction"); +#endif + } +} + +void metadata_Relocation::pd_fix_value(address x) { + if (NativeFarLdr::is_at(addr())) { + NativeFarLdr *nal = NativeFarLdr::from(addr()); + address const_addr = (address)code()->metadata_addr_at(((metadata_Relocation *)this)->metadata_index()); + nal->set_data_addr((intptr_t*) const_addr); + } +} --- /dev/null 2018-09-25 19:25:22.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/relocInfo_aarch32.hpp 2018-09-25 19:25:22.000000000 +0300 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_RELOCINFO_AARCH32_HPP +#define CPU_AARCH32_VM_RELOCINFO_AARCH32_HPP + + // machine-dependent parts of class relocInfo + private: + enum { + // Relocations are byte-aligned. + offset_unit = 1, + // We don't use format(). + format_width = 0 + }; + + public: + + // This platform has no oops in the code that are not also + // listed in the oop section. + static bool mustIterateImmediateOopsInCode() { return false; } + +#endif // CPU_AARCH32_VM_RELOCINFO_AARCH32_HPP --- /dev/null 2018-09-25 19:25:23.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/runtime_aarch32.cpp 2018-09-25 19:25:23.000000000 +0300 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifdef COMPILER2 +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "classfile/systemDictionary.hpp" +#include "code/vmreg.hpp" +#include "interpreter/interpreter.hpp" +#include "opto/runtime.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/globalDefinitions.hpp" +#include "vmreg_aarch32.inline.hpp" +#endif + + --- /dev/null 2018-09-25 19:25:24.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/sharedRuntime_aarch32.cpp 2018-09-25 19:25:24.000000000 +0300 @@ -0,0 +1,3128 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/debugInfoRec.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_aarch32.hpp" +#include "interpreter/interpreter.hpp" +#include "logging/log.hpp" +#include "oops/compiledICHolder.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/align.hpp" +#include "utilities/formatBuffer.hpp" +#include "vmreg_aarch32.inline.hpp" +#include "register_aarch32.hpp" +#include "vm_version_aarch32.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#endif +#if COMPILER2_OR_JVMCI +#include "adfiles/ad_aarch32.hpp" +#include "opto/runtime.hpp" +#endif +#if INCLUDE_JVMCI +#include "jvmci/jvmciJavaClasses.hpp" +#endif + + +#define __ masm-> + +const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; + +class SimpleRuntimeFrame { + + public: + + // Most of the runtime stubs have this simple frame layout. + // This class exists to make the layout shared in one place. + // Offsets are for compiler stack slots, which are jints. + enum layout { + // The frame sender code expects that rbp will be in the "natural" place and + // will override any oopMap setting for it. We must therefore force the layout + // so that it agrees with the frame sender code. + // we don't expect any arg reg save area so aarch32 asserts that + // frame::arg_reg_save_area_bytes == 0 + rbp_off = 0, + rbp_off2, + return_off, return_off2, + framesize + }; +}; + +// FIXME -- this is used by C1 +class RegisterSaver { + public: + static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool lr_pushed = false); + static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true); + + // Capture info about frame layout + enum layout { + fpu_state_off = 0, + fpu_state_end = fpu_state_off+FPUStateSizeInWords-1, + // The frame sender code expects that rfp will be in + // the "natural" place and will override any oopMap + // setting for it. We must therefore force the layout + // so that it agrees with the frame sender code. + // + // FIXME there are extra saved register (from `push_CPU_state`) note that r11 == rfp + r0_off, + r1_off, + r2_off, + r3_off, + r4_off, + r5_off, + r6_off, + r7_off, + r8_off, rmethod_off = r8_off, + r9_off, rscratch1_off = r9_off, + r10_off, + r11_off, + r12_off, + r14_off, // with C2 can hold value different to LR entry in the frame + reg_save_size, + }; + + + // Offsets into the register save area + // Used by deoptimization when it is managing result register + // values on its own + + static int offset_in_bytes(int offset) { return offset * wordSize; } + +// During deoptimization only the result registers need to be restored, + // all the other values have already been extracted. + static void restore_result_registers(MacroAssembler* masm); + +}; + +OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool lr_pushed) { + int frame_size_in_bytes = additional_frame_words*wordSize + (reg_save_size + frame::get_frame_size()) *BytesPerInt; + int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; + int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; + *total_frame_words = frame_size_in_bytes / wordSize;; + + if (lr_pushed) { + if (FrameAPCS) + Unimplemented(); + + __ push(rfp); + __ add(rfp, sp, wordSize); + } else + __ enter(); + __ push_CPU_state(); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* oop_map = new OopMap(frame_size_in_slots, 0); + + oop_map->set_callee_saved(VMRegImpl::stack2reg(r0_off + additional_frame_slots), r0->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r1_off + additional_frame_slots), r1->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r2_off + additional_frame_slots), r2->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r3_off + additional_frame_slots), r3->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r4_off + additional_frame_slots), r4->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r5_off + additional_frame_slots), r5->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r6_off + additional_frame_slots), r6->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r7_off + additional_frame_slots), r7->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r8_off + additional_frame_slots), r8->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r9_off + additional_frame_slots), r9->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r10_off + additional_frame_slots), r10->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r12_off + additional_frame_slots), r12->as_VMReg()); + oop_map->set_callee_saved(VMRegImpl::stack2reg(r14_off + additional_frame_slots), r14->as_VMReg()); + if (hasFPU()) { + for (int i = 0; i < FPUStateSizeInWords; ++i) { + oop_map->set_callee_saved(VMRegImpl::stack2reg(fpu_state_off + i + additional_frame_slots), + as_FloatRegister(i)->as_VMReg()); + } + } + + return oop_map; +} + +void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) { + __ pop_CPU_state(); + if (restore_lr) + __ leave(); + else { + if (FrameAPCS) + Unimplemented(); + + __ sub(sp, rfp, wordSize); + __ pop(rfp); + } +} + +void RegisterSaver::restore_result_registers(MacroAssembler* masm) { + + // Just restore result register. Only used by deoptimization. By + // now any callee save register that needs to be restored to a c2 + // caller of the deoptee has been extracted into the vframeArray + // and will be stuffed into the c2i adapter we create for later + // restoration so only result registers need to be restored here. + + + if(hasFPU()) { + // Restore fp result register + __ vldr_f64(d0, Address(sp, offset_in_bytes(fpu_state_off))); + } + + // Restore integer result register + __ ldr(r0, Address(sp, offset_in_bytes(r0_off))); + __ ldr(r1, Address(sp, offset_in_bytes(r1_off))); + + // Pop all of the register save are off the stack + __ add(sp, sp, (reg_save_size + frame::get_frame_size()) * wordSize); +} + +// Is vector's size (in bytes) bigger than a size saved by default? +// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. +bool SharedRuntime::is_wide_vector(int size) { + return size > 16; +} + +size_t SharedRuntime::trampoline_size() { + return NativeCall::instruction_size; +} + +void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { + __ mov(rscratch1, destination); + __ b(rscratch1); +} +// This functions returns offset from fp to java arguments on stack. +// +// The java_calling_convention describes stack locations as ideal slots on +// a frame with no abi restrictions. Since we must observe abi restrictions +// (like the placement of the register window) the slots must be biased by +// the following value. +static int reg2offset_in(VMReg r) { + // After stack frame created, fp points to 1 slot after previous sp value. + return (r->reg2stack() + 1) * VMRegImpl::stack_slot_size; +} + +static int reg2offset_out(VMReg r) { + return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; +} + +template static const T& min (const T& a, const T& b) { + return (a > b) ? b : a; +} + +// --------------------------------------------------------------------------- +// Read the array of BasicTypes from a signature, and compute where the +// arguments should go. Values in the VMRegPair regs array refer to 4-byte +// quantities. Values less than VMRegImpl::stack0 are registers, those above +// refer to 4-byte stack slots. All stack slots are based off of the stack pointer +// as framesizes are fixed. +// VMRegImpl::stack0 refers to the first slot 0(sp). +// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register +// up to RegisterImpl::number_of_registers) are the 64-bit +// integer registers. + +// Note: the INPUTS in sig_bt are in units of Java argument words, +// which are 64-bit. The OUTPUTS are in 32-bit units. + +int SharedRuntime::java_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed, + int is_outgoing) { + + assert(j_rarg0 == c_rarg0, "assumed"); + +#ifndef HARD_FLOAT_CC + if (hasFPU()) { + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = { + j_rarg0, j_rarg1, j_rarg2, j_rarg3 + }; + const uint FP_ArgReg_N = 16; + static const FloatRegister FP_ArgReg[] = { + f0, f1, f2, f3, + f4, f5, f6, f7, + f8, f9, f10, f11, + f12, f13, f14, f15, + }; + + uint int_args = 0; + uint fp_args = 0; + uint stk_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_FLOAT: + if (fp_args < FP_ArgReg_N) { + regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 1; + } + break; + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + if (int_args < Argument::n_int_register_parameters_j) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 1; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + fp_args = align_up(fp_args, 2); + if (fp_args < FP_ArgReg_N) { + regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); + fp_args += 2; + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (int_args + 1 < Argument::n_int_register_parameters_j) { + if ((int_args % 2) != 0) { + ++int_args; + } + regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); + int_args += 2; + } else { + if (stk_args % 2 != 0) { + ++stk_args; + } + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + int_args = Argument::n_int_register_parameters_j; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return align_up(stk_args, StackAlignmentInBytes/wordSize); + } else +#endif // ndef HARD_FLOAT_CC + { + // in aarch32 pure soft-float mode the java calling convention is set the same as C one + return c_calling_convention(sig_bt, regs, NULL, total_args_passed); + } +} + +// Patch the callers callsite with entry to compiled code if it exists. +static void patch_callers_callsite(MacroAssembler *masm) { + Label L; + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset()))); + __ cbz(rscratch1, L); + + __ enter(); + __ push_CPU_state(); + + // VM needs caller's callsite + // VM needs target method + // This needs to be a long call since we will relocate this adapter to + // the codeBuffer and it may not reach + +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + + __ mov(c_rarg0, rmethod); + __ mov(c_rarg1, lr); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); + __ bl(rscratch1); + __ maybe_isb(); + + __ pop_CPU_state(); + // restore sp + __ leave(); + __ bind(L); +} + +static void gen_c2i_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + Label& skip_fixup) { + // Before we get into the guts of the C2I adapter, see if we should be here + // at all. We've come from compiled code and are attempting to jump to the + // interpreter, which means the caller made a static call to get here + // (vcalls always get a compiled target if there is one). Check for a + // compiled target. If there is one, we need to patch the caller's call. + patch_callers_callsite(masm); + + __ bind(skip_fixup); + + // Since all args are passed on the stack, total_args_passed * + // Interpreter::stackElementSize is the space we need. + + const int extraspace = total_args_passed * Interpreter::stackElementSize; + const Register compArgPos = lr; + int ld_shift = 0; + + __ str(compArgPos, Address(sp, -(extraspace + wordSize))); + __ mov(compArgPos, sp); + + // Now write the args into the outgoing interpreter space + for (int i = 0; i < total_args_passed; i++) { + + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // next stack slot offset + const int next_off = -Interpreter::stackElementSize; + + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + + if (r_2->is_valid()) { + assert(i + 1 < total_args_passed && sig_bt[i + 1] == T_VOID, "going to overrwrite reg_2 value"); + } + + if (r_1->is_stack()) { + // memory to memory use rscratch1 + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size - ld_shift; + if (!r_2->is_valid()) { + __ ldr(rscratch1, Address(compArgPos, ld_off)); + __ str(rscratch1, Address(sp, next_off, Address::pre)); + } else { + int tmp_off = ld_off; + // ldrd accepts only imm8 + if(abs(ld_off) > (255 << 2)) { + if(__ is_valid_for_imm12(ld_off)) { + __ add(compArgPos, compArgPos, ld_off); + } else { + // add operates encoded imm12, NOT plain + __ mov(rscratch1, ld_off); + __ add(compArgPos, compArgPos, rscratch1); + } + tmp_off = 0; + ld_shift += ld_off; + } + __ ldrd(rscratch1, rscratch2, Address(compArgPos, tmp_off)); + __ strd(rscratch1, rscratch2, Address(sp, 2* next_off, Address::pre)); + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + assert(r != compArgPos, "compArgPos was modified"); + if (!r_2->is_valid()) { + __ str(r, Address(sp, next_off, Address::pre)); + } else { + assert(r_2->as_Register() != compArgPos, "compArgPos was modified"); + __ strd(r, r_2->as_Register(), Address(sp, 2 * next_off, Address::pre)); + } + } else { + assert(r_1->is_FloatRegister(), ""); + if (!r_2->is_valid()) { + // Can't do pre or post addressing for vldr, vstr + __ add(sp, sp, next_off); + __ vstr_f32(r_1->as_FloatRegister(), Address(sp)); + } else { + // TODO assert(r_2->is_FloatRegister() && r_2->as_FloatRegister() == r_1->as_FloatRegister() + 1, ""); + // Can't do pre or post addressing for vldr, vstr + __ add(sp, sp, 2 * next_off); + __ vstr_f64(r_1->as_FloatRegister(), Address(sp)); + } + } + } + + // hope, sp is returned to desired value + __ ldr(compArgPos, Address(sp, -wordSize)); + + // set sender sp + if(__ is_valid_for_imm12(extraspace)) { + __ add(r4, sp, extraspace); + } else { + __ mov(rscratch1, extraspace); + __ add(r4, sp, rscratch1); + } + + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::interpreter_entry_offset()))); + __ b(rscratch1); +} + +static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, + address code_start, address code_end, + Label& L_ok) { + Label L_fail; + __ lea(temp_reg, ExternalAddress(code_start)); + __ cmp(pc_reg, temp_reg); + __ b(L_fail, Assembler::LO); + __ lea(temp_reg, ExternalAddress(code_end)); + __ cmp(pc_reg, temp_reg); + __ b(L_ok, Assembler::LO); + __ bind(L_fail); +} + +void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs) { + + // Note: r13 contains the senderSP on entry. We must preserve it since + // we may do a i2c -> c2i transition if we lose a race where compiled + // code goes non-entrant while we get args ready. + + // In addition we use r13 to locate all the interpreter args because + // we must align the stack to 16 bytes. + + // Adapters are frameless. + + // An i2c adapter is frameless because the *caller* frame, which is + // interpreted, routinely repairs its own sp (from + // interpreter_frame_last_sp), even if a callee has modified the + // stack pointer. It also recalculates and aligns sp. + + // A c2i adapter is frameless because the *callee* frame, which is + // interpreted, routinely repairs its caller's sp (from sender_sp, + // which is set up via the senderSP register). + + // In other words, if *either* the caller or callee is interpreted, we can + // get the stack pointer repaired after a call. + + // This is why c2i and i2c adapters cannot be indefinitely composed. + // In particular, if a c2i adapter were to somehow call an i2c adapter, + // both caller and callee would be compiled methods, and neither would + // clean up the stack pointer changes performed by the two adapters. + // If this happens, control eventually transfers back to the compiled + // caller, but with an uncorrected stack, causing delayed havoc. + + if (VerifyAdapterCalls && + (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) { + // So, let's test for cascading c2i/i2c adapters right now. + // assert(Interpreter::contains($return_addr) || + // StubRoutines::contains($return_addr), + // "i2c adapter must return to an interpreter frame"); + __ block_comment("verify_i2c { "); + Label L_ok; + if (Interpreter::code() != NULL) + range_check(masm, lr, rscratch1, + Interpreter::code()->code_start(), Interpreter::code()->code_end(), + L_ok); + if (StubRoutines::code1() != NULL) + range_check(masm, lr, rscratch1, + StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(), + L_ok); + if (StubRoutines::code2() != NULL) + range_check(masm, lr, rscratch1, + StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(), + L_ok); + const char* msg = "i2c adapter must return to an interpreter frame"; + __ block_comment(msg); + __ stop(msg); + __ bind(L_ok); + __ block_comment("} verify_i2ce "); + } + + const int stack_space = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, StackAlignmentInBytes); + const int ld_high = total_args_passed *Interpreter::stackElementSize; + // Point to interpreter value (vs. tag) + const int next_off = -Interpreter::stackElementSize; // offset from ld ptr + const Register loadCounter = lr; + + // Align sp to StackAlignmentInBytes so compiled frame starts always aligned + // This is required by APCS, so all native code depends on it. The compiled + // Java code is not required to follow this standard however doing so + // simplifies the code because allows to have fixed size for compiled frames + __ mov(rscratch2, sp); + __ align_stack(); + if(total_args_passed) { + // put below reserved stack space, imm12 should be enough + __ str(loadCounter, Address(sp, -(stack_space + wordSize))); + + if(__ is_valid_for_imm12(ld_high)) { + __ add(loadCounter, rscratch2, ld_high); + } else { + // add operates encoded imm12, we need plain + __ mov(rscratch1, ld_high); + __ add(loadCounter, rscratch2, rscratch1); + } + } + + if(comp_args_on_stack) { + if(__ is_valid_for_imm12(stack_space)) { + __ sub(sp, sp, stack_space); + } else { + // add operates encoded imm12, we need plain + __ mov(rscratch1, stack_space); + __ sub(sp, sp, rscratch1); + } + } + + // +------+ -> r4 + // | 0 | \ + // | 1 | \ + // | 2 | - > Load in argument order going down. + // | x | / + // | N | / + // +------+ -> inital sp + // | pad | maybe 1 word to align the stack to 8 bytes + // | M | \ + // | x | \ + // | 2 | -> Load in argument order going up. + // | 1 | / + // | 0 | / + // +------+ -> + + + int sp_offset = 0; + + // Now generate the shuffle code. + for (int i = 0; i < total_args_passed; i++) { + + if (sig_bt[i] == T_VOID) { + assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); + continue; + } + + // Pick up 0, 1 or 2 words from SP+offset. + + // + // + // + VMReg r_1 = regs[i].first(); + VMReg r_2 = regs[i].second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + continue; + } + + if (r_2->is_valid()) { + assert(i + 1 < total_args_passed && sig_bt[i + 1] == T_VOID, "going to overrwrite reg_2 value"); + } + + if (r_1->is_stack()) { + // Convert stack slot to an SP offset + int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size - sp_offset; + + if (!r_2->is_valid()) { + __ ldr(rscratch2, Address(loadCounter, next_off, Address::pre)); + __ str(rscratch2, Address(sp, st_off)); + } else { + int tmp_off = st_off; + if(abs(st_off) > (255 << 2)) { + //st_off doesn't fit imm8 required by strd + + if(__ is_valid_for_imm12(st_off)) { + __ add(sp, sp, st_off); + } else { + // add operates encoded imm12, NOT plain + __ mov(rscratch1, st_off); + __ add(sp, sp, rscratch1); + } + tmp_off = 0; + sp_offset += st_off; + } + + + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // this can be a misaligned move + __ ldrd(rscratch1, rscratch2, Address(loadCounter, 2 * next_off, Address::pre)); + __ strd(rscratch1, rscratch2, Address(sp, tmp_off)); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + assert(r != loadCounter, "loadCounter is reloaded"); + if (r_2->is_valid()) { + assert(r_2->as_Register() != loadCounter, "loadCounter is reloaded"); + // this can be a misaligned move + // ldrd can handle inconsecutive registers + __ ldrd(r, r_2->as_Register(), Address(loadCounter, 2 * next_off, Address::pre)); + } else { + __ ldr(r, Address(loadCounter, next_off, Address::pre)); + } + } else { + assert(r_1->is_FloatRegister(), ""); + if (!r_2->is_valid()) { + // Can't do pre or post addressing for vldr, vstr + __ add(loadCounter, loadCounter, next_off); + __ vldr_f32(r_1->as_FloatRegister(), Address(loadCounter)); + } else { + // TODO assert(r_2->is_FloatRegister() && r_2->as_FloatRegister() == r_1->as_FloatRegister() + 1, ""); + // Can't do pre or post addressing for vldr, vstr + __ add(loadCounter, loadCounter, 2 * next_off); + __ vldr_f64(r_1->as_FloatRegister(), Address(loadCounter)); + } + } + } + + // restore sp + if(sp_offset) { + if(__ is_valid_for_imm12(sp_offset)) { + __ sub(sp, sp, sp_offset); + } else { + // add operates encoded imm12, we need plain + __ mov(rscratch1, sp_offset); + __ sub(sp, sp, rscratch1); + } + } + + if(total_args_passed) { + // restore loadCounter + __ ldr(loadCounter, Address(sp, -wordSize)); + } + + // 6243940 We might end up in handle_wrong_method if + // the callee is deoptimized as we race thru here. If that + // happens we don't want to take a safepoint because the + // caller frame will look interpreted and arguments are now + // "compiled" so it is much better to make this transition + // invisible to the stack walking code. Unfortunately if + // we try and find the callee by normal means a safepoint + // is possible. So we stash the desired callee in the thread + // and the vm will find there should this case occur. + + __ str(rmethod, Address(rthread, JavaThread::callee_target_offset())); + + // Will jump to the compiled code just as if compiled code was doing it. + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset()))); + __ b(rscratch1); +} + +// --------------------------------------------------------------- +AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, + int total_args_passed, + int comp_args_on_stack, + const BasicType *sig_bt, + const VMRegPair *regs, + AdapterFingerPrint* fingerprint) { + address i2c_entry = __ pc(); + gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + + address c2i_unverified_entry = __ pc(); + Label skip_fixup; + + Label ok; + + Register holder = rscratch2; + Register receiver = j_rarg0; + Register tmp = r8; // A call-clobbered register not used for arg passing + + // ------------------------------------------------------------------------- + // Generate a C2I adapter. On entry we know rmethod holds the Method* during calls + // to the interpreter. The args start out packed in the compiled layout. They + // need to be unpacked into the interpreter layout. This will almost always + // require some stack space. We grow the current (compiled) stack, then repack + // the args. We finally end in a jump to the generic interpreter entry point. + // On exit from the interpreter, the interpreter will restore our SP (lest the + // compiled code, which relys solely on SP and not FP, get sick). + + { + __ block_comment("c2i_unverified_entry {"); + __ load_klass(rscratch1, receiver); + __ ldr(tmp, Address(holder, CompiledICHolder::holder_klass_offset())); + __ cmp(rscratch1, tmp); + __ ldr(rmethod, Address(holder, CompiledICHolder::holder_metadata_offset())); + __ b(ok, Assembler::EQ); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + __ bind(ok); + // Method might have been compiled since the call site was patched to + // interpreted; if that is the case treat it as a miss so we can get + // the call site corrected. + __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset()))); + __ cbz(rscratch1, skip_fixup); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ block_comment("} c2i_unverified_entry"); + } + + address c2i_entry = __ pc(); + + gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + + __ flush(); + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); +} + +int SharedRuntime::c_calling_convention(const BasicType *sig_bt, + VMRegPair *regs, + VMRegPair *regs2, + int total_args_passed) { + assert(regs2 == NULL, "not needed on AArch32"); + +// We return the amount of VMRegImpl stack slots we need to reserve for all +// the arguments NOT counting out_preserve_stack_slots. + + static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = { + c_rarg0, c_rarg1, c_rarg2, c_rarg3 + }; +#ifdef HARD_FLOAT_CC + const int FP_ArgReg_N = 16; + static const FloatRegister FP_ArgReg[] = { + f0, f1, f2, f3, + f4, f5, f6, f7, + f8, f9, f10, f11, + f12, f13, f14, f15, + }; + unsigned long fp_free_mask = (1 << FP_ArgReg_N) - 1; + uint fp_args = 0; +#endif //HARD_FLOAT_CC + + uint int_args = 0; + uint stk_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + case T_OBJECT: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: +#ifndef HARD_FLOAT_CC + // soft FP case + case T_FLOAT: +#endif + if (int_args < Argument::n_int_register_parameters_c) { + regs[i].set1(INT_ArgReg[int_args++]->as_VMReg()); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 1; + } + break; +#ifndef HARD_FLOAT_CC + // soft FP case + case T_DOUBLE: +#endif + case T_LONG: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (int_args + 1 < Argument::n_int_register_parameters_c) { + if ((int_args % 2) != 0) { + ++int_args; + } + regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); + int_args += 2; + } else { + if (stk_args % 2 != 0) { + ++stk_args; + } + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + int_args = Argument::n_int_register_parameters_c; + } + break; +#ifdef HARD_FLOAT_CC + case T_FLOAT: + if (fp_free_mask & ((1 << FP_ArgReg_N)-1)) { + unsigned index = __builtin_ctz(fp_free_mask); + regs[i].set1(FP_ArgReg[index]->as_VMReg()); + fp_free_mask &= ~(1 << index); + fp_args += 2 * ((~index) & 1); + } else { + regs[i].set1(VMRegImpl::stack2reg(stk_args)); + stk_args += 1; + } + break; + case T_DOUBLE: + assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args + 1 < FP_ArgReg_N) { + fp_free_mask &= ~(3 << fp_args); + regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); + fp_args += 2; + } else { + regs[i].set2(VMRegImpl::stack2reg(stk_args)); + stk_args += 2; + } + break; +#endif //HARD_FLOAT_CC + case T_VOID: // Halves of longs and doubles + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + default: + ShouldNotReachHere(); + break; + } + } + + return align_up(stk_args, StackAlignmentInBytes/wordSize); +} + +// On 64 bit we will store integer like items to the stack as +// 64 bits items (sparc abi) even though java would only store +// 32bits for a parameter. On 32bit it will simply be 32 bits +// So this routine will do 32->32 on 32bit and 32->64 on 64bit + +static void move_int(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); + __ str(rscratch1, Address(sp, reg2offset_out(dst.first()))); + } else { + // stack to reg + __ ldr(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ str(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first()))); + } else { + if (dst.first() != src.first()) { + __ mov(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} + +// An oop arg. Must pass a handle not the oop itself +static void object_move(MacroAssembler* masm, + OopMap* map, + int oop_handle_offset, + int framesize_in_slots, + VMRegPair src, + VMRegPair dst, + bool is_receiver, + int* receiver_offset) { + + // must pass a handle. First figure out the location we use as a handle + + Register rHandle = dst.first()->is_stack() ? rscratch2 : dst.first()->as_Register(); + + // See if oop is NULL if it is we need no handle + + if (src.first()->is_stack()) { + + // Oop is already on the stack as an argument + int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { + *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + + __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); + __ lea(rHandle, Address(rfp, reg2offset_in(src.first()))); + // conditionally move a NULL + __ cmp(rscratch1, 0); + __ mov(rHandle, 0, Assembler::EQ); + } else { + + // Oop is in an a register we must store it to the space we reserve + // on the stack for oop_handles and pass a handle if oop is non-NULL + + const Register rOop = src.first()->as_Register(); + int oop_slot; + if (rOop == j_rarg0) + oop_slot = 0; + else if (rOop == j_rarg1) + oop_slot = 1; + else if (rOop == j_rarg2) + oop_slot = 2; + else { + assert(rOop == j_rarg3, "wrong register"); + oop_slot = 3; + } + + oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset; + int offset = oop_slot*VMRegImpl::stack_slot_size; + + map->set_oop(VMRegImpl::stack2reg(oop_slot)); + // Store oop in handle area, may be NULL + __ str(rOop, Address(sp, offset)); + if (is_receiver) { + *receiver_offset = offset; + } + + __ cmp(rOop, 0); + __ lea(rHandle, Address(sp, offset)); + // conditionally move a NULL + __ mov(rHandle, 0, Assembler::EQ); + } + + // If arg is on the stack then place it otherwise it is already in correct reg. + if (dst.first()->is_stack()) { + __ str(rHandle, Address(sp, reg2offset_out(dst.first()))); + } +} + +// A float arg may have to do float reg int reg conversion +static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if(hasFPU()) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + // Have no vfp scratch registers, so copy via gpr + __ ldr(rscratch1, Address(rfp, reg2offset_in(src.first()))); + __ str(rscratch1, Address(sp, reg2offset_out(dst.first()))); + } else { + // stack to reg + __ vldr_f32(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ vstr_f32(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first()))); + } else { +#ifndef HARD_FLOAT_CC + if(dst.first()->is_Register()) { + __ vmov_f32(dst.first()->as_Register(), src.first()->as_FloatRegister()); + } else +#endif + if (dst.first() != src.first()) { + __ vmov_f32(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } + } + } else { + move_int(masm, src, dst); + } +} + +// A long move +static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + __ ldrd(rscratch1, rscratch2, Address(rfp, reg2offset_in(src.first()))); + __ strd(rscratch1, rscratch2, Address(sp, reg2offset_out(dst.first()))); + } else { + // stack to reg + __ ldrd(dst.first()->as_Register(), dst.second()->as_Register(), + Address(rfp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ strd(src.first()->as_Register(), src.second()->as_Register(), + Address(sp, reg2offset_out(dst.first()))); + } else { + // reg to reg + if (dst.first() != src.first()) { + if (dst.first() != src.second()) { + __ mov(dst.first()->as_Register(), src.first()->as_Register()); + __ mov(dst.second()->as_Register(), src.second()->as_Register()); + } else { + __ mov(dst.second()->as_Register(), src.second()->as_Register()); + __ mov(dst.first()->as_Register(), src.first()->as_Register()); + } + } + } +} + +// A double move +static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { + if(hasFPU()) { + if (src.first()->is_stack()) { + if (dst.first()->is_stack()) { + // stack to stack + // Have no vfp scratch registers, so copy via gpr + __ ldrd(rscratch1, rscratch2, Address(rfp, reg2offset_in(src.first()))); + __ strd(rscratch1, rscratch2, Address(sp, reg2offset_out(dst.first()))); + } else { + // stack to reg + __ vldr_f64(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first()))); + } + } else if (dst.first()->is_stack()) { + // reg to stack + __ vstr_f64(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first()))); + } else { +#ifndef HARD_FLOAT_CC + if(dst.first()->is_Register()) { + __ vmov_f64(dst.first()->as_Register(), dst.second()->as_Register(), src.first()->as_FloatRegister()); + } else +#endif + if (dst.first() != src.first()) { + __ vmov_f64(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); + } + } + } else { + long_move(masm, src, dst); + } +} + + +void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_DOUBLE: +#ifdef HARD_FLOAT_CC + __ vstr_f64(d0, Address(rfp, -(frame::get_frame_size() + 1) * wordSize)); + break; +#endif//fall through otherwise + case T_LONG: + __ strd(r0, r1, Address(rfp, -(frame::get_frame_size() + 1) * wordSize)); + break; + case T_VOID: + break; + case T_FLOAT: +#ifdef HARD_FLOAT_CC + __ vstr_f32(f0, Address(rfp, -frame::get_frame_size() * wordSize)); + break; +#endif//fall through otherwise + default: + __ str(r0, Address(rfp, -frame::get_frame_size() * wordSize)); + break; + } +} + +void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { + // We always ignore the frame_slots arg and just use the space just below frame pointer + // which by this time is free to use + switch (ret_type) { + case T_DOUBLE: +#ifdef HARD_FLOAT_CC + __ vldr_f64(d0, Address(rfp, -(frame::get_frame_size() + 1) * wordSize)); + break; +#endif//fall through otherwise + case T_LONG: + __ ldrd(r0, r1, Address(rfp, -(frame::get_frame_size() + 1) * wordSize)); + break; + case T_VOID: + break; + case T_FLOAT: +#ifdef HARD_FLOAT_CC + __ vldr_f32(d0, Address(rfp, -frame::get_frame_size() * wordSize)); + break; +#endif//fall through otherwise + default: + __ ldr(r0, Address(rfp, -frame::get_frame_size() * wordSize)); + break; + } +} + +static int save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + RegSet x; + int saved_slots = 0; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + x = x + args[i].first()->as_Register(); + ++saved_slots; + } + if (args[i].second()->is_Register()) { + x = x + args[i].second()->as_Register(); + ++saved_slots; + } +#ifdef HARD_FLOAT_CC + else if (args[i].first()->is_FloatRegister()) { + FloatRegister fr = args[i].first()->as_FloatRegister(); + + if (args[i].second()->is_FloatRegister()) { + assert(args[i].is_single_phys_reg(), "doubles should be 2 consequents float regs"); + __ decrement(sp, 2 * wordSize); + __ vstr_f64(fr, Address(sp)); + saved_slots += 2; + } else { + __ decrement(sp, wordSize); + __ vstr_f32(fr, Address(sp)); + ++saved_slots; + } + } +#endif//HARD_FLOAT_CC + } + __ push(x, sp); + return saved_slots; +} + +static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { + RegSet x; + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + x = x + args[i].first()->as_Register(); + } else { + ; + } + if (args[i].second()->is_Register()) { + x = x + args[i].second()->as_Register(); + } + } + __ pop(x, sp); + for ( int i = first_arg ; i < arg_count ; i++ ) { + if (args[i].first()->is_Register()) { + ; + } +#ifdef HARD_FLOAT_CC + else if (args[i].first()->is_FloatRegister()) { + FloatRegister fr = args[i].first()->as_FloatRegister(); + + if (args[i].second()->is_FloatRegister()) { + assert(args[i].is_single_phys_reg(), "doubles should be 2 consequents float regs"); + __ vstr_f64(fr, Address(sp)); + __ increment(sp, 2 * wordSize); + } else { + __ vstr_f32(fr, Address(sp)); + __ increment(sp, wordSize); + } + } +#endif//HARD_FLOAT_CC + } +} + + +// Check GCLocker::needs_gc and enter the runtime if it's true. This +// keeps a new JNI critical region from starting until a GC has been +// forced. Save down any oops in registers and describe them in an +// OopMap. +static void check_needs_gc_for_critical_native(MacroAssembler* masm, + int stack_slots, + int total_c_args, + int total_in_args, + int arg_save_area, + OopMapSet* oop_maps, + VMRegPair* in_regs, + BasicType* in_sig_bt) { Unimplemented(); } + +// Unpack an array argument into a pointer to the body and the length +// if the array is non-null, otherwise pass 0 for both. +static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); } + + +class ComputeMoveOrder: public StackObj { + class MoveOperation: public ResourceObj { + friend class ComputeMoveOrder; + private: + VMRegPair _src; + VMRegPair _dst; + int _src_index; + int _dst_index; + bool _processed; + MoveOperation* _next; + MoveOperation* _prev; + + static int get_id(VMRegPair r) { Unimplemented(); return 0; } + + public: + MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst): + _src(src) + , _src_index(src_index) + , _dst(dst) + , _dst_index(dst_index) + , _next(NULL) + , _prev(NULL) + , _processed(false) { Unimplemented(); } + + VMRegPair src() const { Unimplemented(); return _src; } + int src_id() const { Unimplemented(); return 0; } + int src_index() const { Unimplemented(); return 0; } + VMRegPair dst() const { Unimplemented(); return _src; } + void set_dst(int i, VMRegPair dst) { Unimplemented(); } + int dst_index() const { Unimplemented(); return 0; } + int dst_id() const { Unimplemented(); return 0; } + MoveOperation* next() const { Unimplemented(); return 0; } + MoveOperation* prev() const { Unimplemented(); return 0; } + void set_processed() { Unimplemented(); } + bool is_processed() const { Unimplemented(); return 0; } + + // insert + void break_cycle(VMRegPair temp_register) { Unimplemented(); } + + void link(GrowableArray& killer) { Unimplemented(); } + }; + + private: + GrowableArray edges; + + public: + ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs, + BasicType* in_sig_bt, GrowableArray& arg_order, VMRegPair tmp_vmreg) { Unimplemented(); } + + // Collected all the move operations + void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) { Unimplemented(); } + + // Walk the edges breaking cycles between moves. The result list + // can be walked in order to produce the proper set of loads + GrowableArray* get_store_order(VMRegPair temp_register) { Unimplemented(); return 0; } +}; + + +static void rt_call(MacroAssembler* masm, address dest) { + CodeBlob *cb = CodeCache::find_blob(dest); + if (cb) { + __ far_call(RuntimeAddress(dest), NULL); + } else { + __ lea(rscratch2, RuntimeAddress(dest)); + __ bl(rscratch2); + __ maybe_isb(); + } +} + +static void verify_oop_args(MacroAssembler* masm, + const methodHandle &method, + const BasicType* sig_bt, + const VMRegPair* regs) { + Register temp_reg = rscratch2; // not part of any compiled calling seq + if (VerifyOops) { + for (int i = 0; i < method->size_of_parameters(); i++) { + if (sig_bt[i] == T_OBJECT || + sig_bt[i] == T_ARRAY) { + VMReg r = regs[i].first(); + assert(r->is_valid(), "bad oop arg"); + if (r->is_stack()) { + __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); + __ verify_oop(temp_reg); + } else { + __ verify_oop(r->as_Register()); + } + } + } + } +} + +static void gen_special_dispatch(MacroAssembler* masm, + const methodHandle &method, + const BasicType* sig_bt, + const VMRegPair* regs) { + verify_oop_args(masm, method, sig_bt, regs); + vmIntrinsics::ID iid = method->intrinsic_id(); + + // Now write the args into the outgoing interpreter space + bool has_receiver = false; + Register receiver_reg = noreg; + int member_arg_pos = -1; + Register member_reg = noreg; + int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); + if (ref_kind != 0) { + member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument + member_reg = r4; + has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); + } else if (iid == vmIntrinsics::_invokeBasic) { + has_receiver = true; + } else { + fatal("unexpected intrinsic id %d", iid); + } + + if (member_reg != noreg) { + // Load the member_arg into register, if necessary. + SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); + VMReg r = regs[member_arg_pos].first(); + if (r->is_stack()) { + __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size)); + } else { + // no data motion is needed + member_reg = r->as_Register(); + } + } + + if (has_receiver) { + // Make sure the receiver is loaded into a register. + assert(method->size_of_parameters() > 0, "oob"); + assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); + VMReg r = regs[0].first(); + assert(r->is_valid(), "bad receiver arg"); + if (r->is_stack()) { + // Porting note: This assumes that compiled calling conventions always + // pass the receiver oop in a register. If this is not true on some + // platform, pick a temp and load the receiver from stack. + fatal("receiver always in a register"); + } else { + // no data motion is needed + receiver_reg = r->as_Register(); + } + } + + // Figure out which address we are really jumping to: + MethodHandles::generate_method_handle_dispatch(masm, iid, + receiver_reg, member_reg, /*for_compiler_entry:*/ true); +} + +// --------------------------------------------------------------------------- +// Generate a native wrapper for a given method. The method takes arguments +// in the Java compiled code convention, marshals them to the native +// convention (handlizes oops, etc), transitions to native, makes the call, +// returns to java state (possibly blocking), unhandlizes any result and +// returns. +// +// Critical native functions are a shorthand for the use of +// GetPrimtiveArrayCritical and disallow the use of any other JNI +// functions. The wrapper is expected to unpack the arguments before +// passing them to the callee and perform checks before and after the +// native call to ensure that they GC_locker +// lock_critical/unlock_critical semantics are followed. Some other +// parts of JNI setup are skipped like the tear down of the JNI handle +// block and the check for pending exceptions it's impossible for them +// to be thrown. +// +// They are roughly structured like this: +// if (GC_locker::needs_gc()) +// SharedRuntime::block_for_jni_critical(); +// tranistion to thread_in_native +// unpack arrray arguments and call native entry point +// check for safepoint in progress +// check if any thread suspend flags are set +// call into JVM and possible unlock the JNI critical +// if a GC was suppressed while in the critical native. +// transition back to thread_in_Java +// return to caller +// +nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, + const methodHandle& method, + int compile_id, + BasicType* in_sig_bt, + VMRegPair* in_regs, + BasicType ret_type) { + if (method->is_method_handle_intrinsic()) { + vmIntrinsics::ID iid = method->intrinsic_id(); + intptr_t start = (intptr_t)__ pc(); + int vep_offset = ((intptr_t)__ pc()) - start; + + // First instruction must be a nop as it may need to be patched on deoptimisation + __ nop(); + gen_special_dispatch(masm, + method, + in_sig_bt, + in_regs); + int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period + __ flush(); + int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually + return nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + in_ByteSize(-1), + in_ByteSize(-1), + (OopMapSet*)NULL); + } + + bool is_critical_native = true; + address native_func = method->critical_native_function(); + if (native_func == NULL) { + native_func = method->native_function(); + is_critical_native = false; + } + assert(native_func != NULL, "must have function"); + + // An OopMap for lock (and class if static) + OopMapSet *oop_maps = new OopMapSet(); + intptr_t start = (intptr_t)__ pc(); + + // We have received a description of where all the java arg are located + // on entry to the wrapper. We need to convert these args to where + // the jni function will expect them. To figure out where they go + // we convert the java signature to a C signature by inserting + // the hidden arguments as arg[0] and possibly arg[1] (static method) + + const int total_in_args = method->size_of_parameters(); + int total_c_args = total_in_args; + if (!is_critical_native) { + total_c_args += 1; + if (method->is_static()) { + total_c_args++; + } + } else { + for (int i = 0; i < total_in_args; i++) { + if (in_sig_bt[i] == T_ARRAY) { + total_c_args++; + } + } + } + + BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); + VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); + BasicType* in_elem_bt = NULL; + + int argc = 0; + if (!is_critical_native) { + out_sig_bt[argc++] = T_ADDRESS; + if (method->is_static()) { + out_sig_bt[argc++] = T_OBJECT; + } + + for (int i = 0; i < total_in_args ; i++ ) { + out_sig_bt[argc++] = in_sig_bt[i]; + } + } else { + Thread* THREAD = Thread::current(); + in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); + SignatureStream ss(method->signature()); + for (int i = 0; i < total_in_args ; i++ ) { + if (in_sig_bt[i] == T_ARRAY) { + // Arrays are passed as int, elem* pair + out_sig_bt[argc++] = T_INT; + out_sig_bt[argc++] = T_ADDRESS; + Symbol* atype = ss.as_symbol(CHECK_NULL); + const char* at = atype->as_C_string(); + if (strlen(at) == 2) { + assert(at[0] == '[', "must be"); + switch (at[1]) { + case 'B': in_elem_bt[i] = T_BYTE; break; + case 'C': in_elem_bt[i] = T_CHAR; break; + case 'D': in_elem_bt[i] = T_DOUBLE; break; + case 'F': in_elem_bt[i] = T_FLOAT; break; + case 'I': in_elem_bt[i] = T_INT; break; + case 'J': in_elem_bt[i] = T_LONG; break; + case 'S': in_elem_bt[i] = T_SHORT; break; + case 'Z': in_elem_bt[i] = T_BOOLEAN; break; + default: ShouldNotReachHere(); + } + } + } else { + out_sig_bt[argc++] = in_sig_bt[i]; + in_elem_bt[i] = T_VOID; + } + if (in_sig_bt[i] != T_VOID) { + assert(in_sig_bt[i] == ss.type(), "must match"); + ss.next(); + } + } + } + + // Now figure out where the args must be stored and how much stack space + // they require. + int out_arg_slots; + out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); + + // Compute framesize for the wrapper. We need to handlize all oops in + // incoming registers + + // Calculate the total number of stack slots we will need. + + // First count the abi requirement plus all of the outgoing args + int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; + + // Now the space for the inbound oop handle area + int total_save_slots = -1; + if (is_critical_native) { + // Critical natives may have to call out so they need a save area + // for register arguments. + int double_slots = 0; + int single_slots = 0; + for ( int i = 0; i < total_in_args; i++) { + if (in_regs[i].first()->is_Register()) { + const Register reg = in_regs[i].first()->as_Register(); + switch (in_sig_bt[i]) { + case T_ARRAY: // critical array (uses 2 slots on LP64) + case T_BOOLEAN: + case T_BYTE: + case T_SHORT: + case T_CHAR: + case T_INT: single_slots++; break; + case T_LONG: double_slots++; break; + default: ShouldNotReachHere(); + } + } else +#ifdef HARD_FLOAT_CC + if (in_regs[i].first()->is_FloatRegister()) +#endif // HARD_FLOAT_CC + ShouldNotReachHere(); + } + total_save_slots = double_slots * 2 + single_slots; + // align the save area + if (double_slots != 0) { + stack_slots = align_up(stack_slots, 2); + } + } else { + total_save_slots = 4 * VMRegImpl::slots_per_word; // 4 arguments passed in registers + } + assert(total_save_slots != -1, "initialize total_save_slots!"); + + int oop_handle_offset = stack_slots; + stack_slots += total_save_slots; + + // Now any space we need for handlizing a klass if static method + + int klass_slot_offset = 0; + int klass_offset = -1; + int lock_slot_offset = 0; + bool is_static = false; + + if (method->is_static()) { + klass_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; + is_static = true; + } + + // Plus a lock if needed + + if (method->is_synchronized()) { + lock_slot_offset = stack_slots; + stack_slots += VMRegImpl::slots_per_word; + } + + const int enter_frame_size = frame::get_frame_size(); + + // Now a place (+2) to save return values or temp during shuffling + // + {2,4} words which pushed by enter() + // (return address (which we own), saved rfp, ...) + stack_slots += 2 + enter_frame_size; + + // Ok The space we have allocated will look like: + // + // + // FP-> | saved lr | + // |---------------------| + // | saved fp | + // |---------------------| + // | 2 slots for moves | + // |.....................| + // | 1 slot opt padding | + // |---------------------| + // | lock box (if sync) | + // |---------------------| <- lock_slot_offset + // | klass (if static) | + // |---------------------| <- klass_slot_offset + // | oopHandle area | + // |---------------------| <- oop_handle_offset (8 java arg registers) + // | outbound memory | + // | based arguments | + // | | + // |---------------------| + // | | + // SP-> | out_preserved_slots | + // + // + + + // Now compute actual number of stack words we need rounding to make + // stack properly aligned. + stack_slots = align_up(stack_slots, StackAlignmentInSlots); + + int stack_size = stack_slots * VMRegImpl::stack_slot_size; + + // First thing make an ic check to see if we should even be here + + // We are free to use all registers as temps without saving them and + // restoring them except rfp. rfp is the only callee save register + // as far as the interpreter and the compiler(s) are concerned. + + + const Register ic_reg = rscratch2; + const Register receiver = j_rarg0; + + Label hit; + Label exception_pending; + + assert_different_registers(ic_reg, receiver, rscratch1); + __ verify_oop(receiver); + __ cmp_klass(receiver, ic_reg, rscratch1); + __ b(hit, Assembler::EQ); + + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // Verified entry point must be aligned + __ align(8); + + __ bind(hit); + +#ifdef ASSERT + __ mov(ic_reg, 0xdead); // trash ic_reg(rscratch2), as used as real scratch further +#endif + + int vep_offset = ((intptr_t)__ pc()) - start; + + // Generate stack overflow check + + // If we have to make this method not-entrant we'll overwrite its + // first instruction with a jump. For this action to be legal we + // must ensure that this first instruction is a B, BL, NOP, BKPT, + // SVC, HVC, or SMC. Make it a NOP. + __ nop(); + + if (UseStackBanging) { + __ bang_stack_with_offset(JavaThread::stack_shadow_zone_size()); + } else { + Unimplemented(); + } + + // Generate a new frame for the wrapper. + __ enter(); + // some words are pushed by enter, so adjust frame size on this value + __ sub(sp, sp, stack_size - enter_frame_size * wordSize); + + // Frame is now completed as far as size and linkage. + int frame_complete = ((intptr_t)__ pc()) - start; + + if (is_critical_native) { + check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, + oop_handle_offset, oop_maps, in_regs, in_sig_bt); + } + + // + // We immediately shuffle the arguments so that any vm call we have to + // make from here on out (sync slow path, jvmti, etc.) we will have + // captured the oops from our caller and have a valid oopMap for + // them. + + // ----------------- + // The Grand Shuffle + + // The Java calling convention is either equal (linux) or denser (win64) than the + // c calling convention. However the because of the jni_env argument the c calling + // convention always has at least one more (and two for static) arguments than Java. + // Therefore if we move the args from java -> c backwards then we will never have + // a register->register conflict and we don't have to build a dependency graph + // and figure out how to break any cycles. + // + + // Record sp-based slot for receiver on stack for non-static methods + int receiver_offset = -1; + + // This is a trick. We double the stack slots so we can claim + // the oops in the caller's frame. Since we are sure to have + // more args than the caller doubling is enough to make + // sure we can capture all the incoming oop args from the + // caller. + // + OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); + + // Mark location of rfp (someday) + // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rfp)); + + +#ifdef ASSERT + bool reg_destroyed[RegisterImpl::number_of_registers]; + bool freg_destroyed[FloatRegisterImpl::number_of_registers]; + for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { + reg_destroyed[r] = false; + } + for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { + freg_destroyed[f] = false; + } + +#endif // ASSERT + + // This may iterate in two different directions depending on the + // kind of native it is. The reason is that for regular JNI natives + // the incoming and outgoing registers are offset upwards and for + // critical natives they are offset down. + GrowableArray arg_order(2 * total_in_args); + VMRegPair tmp_vmreg; + tmp_vmreg.set2(rscratch2->as_VMReg()); + + if (!is_critical_native) { + for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { + arg_order.push(i); + arg_order.push(c_arg); + } + } else { + // Compute a valid move order, using tmp_vmreg to break any cycles + ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); + } + + int temploc = -1; + for (int ai = 0; ai < arg_order.length(); ai += 2) { + int i = arg_order.at(ai); + int c_arg = arg_order.at(ai + 1); + __ block_comment(err_msg("move %d -> %d", i, c_arg)); + if (c_arg == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // This arg needs to be moved to a temporary + __ mov(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); + in_regs[i] = tmp_vmreg; + temploc = i; + continue; + } else if (i == -1) { + assert(is_critical_native, "should only be required for critical natives"); + // Read from the temporary location + assert(temploc != -1, "must be valid"); + i = temploc; + temploc = -1; + } +#ifdef ASSERT + if (in_regs[i].first()->is_Register()) { + assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); + } else if (in_regs[i].first()->is_FloatRegister()) { + assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); + } + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif // ASSERT + switch (in_sig_bt[i]) { + case T_ARRAY: + if (is_critical_native) { + unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); + c_arg++; +#ifdef ASSERT + if (out_regs[c_arg].first()->is_Register()) { + reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; + } else if (out_regs[c_arg].first()->is_FloatRegister()) { + freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; + } +#endif + break; + } + case T_OBJECT: + assert(!is_critical_native, "no oop arguments"); + object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], + ((i == 0) && (!is_static)), + &receiver_offset); + break; + case T_VOID: + break; + + case T_FLOAT: + float_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_DOUBLE: + assert( i + 1 < total_in_args && + in_sig_bt[i + 1] == T_VOID && + out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); + double_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_LONG : + long_move(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_BOOLEAN : + case T_BYTE : + case T_CHAR : + case T_SHORT : + case T_INT : + move_int(masm, in_regs[i], out_regs[c_arg]); + break; + + case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); + case T_NARROWOOP : + case T_METADATA : + case T_NARROWKLASS : + default: + ShouldNotReachHere(); + } + } + + // point c_arg at the first arg that is already loaded in case we + // need to spill before we call out + int c_arg = total_c_args - total_in_args; + + // We use r4 as the oop handle for the receiver/klass + // It is callee save so it survives the call to native + + const Register oop_handle_reg = r4; + + // Pre-load a static method's oop. Used both by locking code and + // the normal JNI call code. + if (method->is_static() && !is_critical_native) { + + // load oop into a register + __ movoop(oop_handle_reg, + JNIHandles::make_local(method->method_holder()->java_mirror()), + /*immediate*/true); + + // Now handlize the static class mirror it's known not-null. + __ str(oop_handle_reg, Address(sp, klass_offset)); + map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); + + // Now get the handle + __ lea(oop_handle_reg, Address(sp, klass_offset)); + // store the klass handle as second argument + __ mov(c_rarg1, oop_handle_reg); + // and protect the arg if we must spill + c_arg--; + } + + // Change state to native (we save the return address in the thread, since it might not + // be pushed on the stack when we do a a stack traversal). It is enough that the pc() + // points into the right code segment. It does not have to be the correct return pc. + // We use the same pc/oopMap repeatedly when we call out + + intptr_t the_pc = (intptr_t) __ pc(); + oop_maps->add_gc_map(the_pc - start, map); + + __ set_last_Java_frame(sp, noreg, (address)the_pc, rscratch1); + + + // We have all of the arguments setup at this point. We must not touch any register + // argument registers at this point (what if we save/restore them there are no oop? + +#ifdef DTRACE_ENABLED + { + SkipIfEqual skip(masm, &DTraceMethodProbes, false); + // protect the args we've loaded + (void) save_args(masm, total_c_args, c_arg, out_regs); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), + rthread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + } +#endif + + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + // protect the args we've loaded + save_args(masm, total_c_args, c_arg, out_regs); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + rthread, c_rarg1); + restore_args(masm, total_c_args, c_arg, out_regs); + } + + // Lock a synchronized method + + // Register definitions used by locking and unlocking + + Label slow_path_lock; + Label lock_done; + + if (method->is_synchronized()) { + assert(!is_critical_native, "unhandled"); + + // registers below are not used to pass parameters + // and they are caller save in C1 + // => safe to use as temporary here + const Register swap_reg = r5; + const Register obj_reg = r6; // Will contain the oop + const Register lock_reg = r7; // Address of compiler lock object (BasicLock) + + const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); + + // Get the handle (the 2nd argument) + __ mov(oop_handle_reg, c_rarg1); + + // Get address of the box + + __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + + // Load the oop from the handle + __ ldr(obj_reg, Address(oop_handle_reg, 0)); + + if (UseBiasedLocking) { + __ biased_locking_enter(obj_reg, swap_reg, rscratch2, rscratch1, false, lock_done, &slow_path_lock); + } + + // Load (object->mark() | 1) into swap_reg %r0 + __ ldr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ orr(swap_reg, swap_reg, 1); + + // Save (object->mark() | 1) into BasicLock's displaced header + __ str(swap_reg, Address(lock_reg, mark_word_offset)); + + // src -> dest iff dest == r0 else r0 <- dest + { Label here; + __ cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, lock_done, &slow_path_lock); + } + + // Slow path will re-enter here + __ bind(lock_done); + } + + + // Finally just about ready to make the JNI call + + + // get JNIEnv* which is first argument to native + if (!is_critical_native) { + __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset()))); + } + + // Now set thread in native + __ mov(rscratch1, _thread_in_native); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ dmb(Assembler::ISH); + __ str(rscratch1, rscratch2); + + // Do the call + rt_call(masm, native_func); + + // Unpack native results. + switch (ret_type) { + case T_BOOLEAN: __ c2bool(r0); break; + case T_CHAR : __ uxth(r0, r0); break; + case T_BYTE : __ sxtb(r0, r0); break; + case T_SHORT : __ sxth(r0, r0); break; + case T_INT : break; + case T_FLOAT : +#ifndef HARD_FLOAT_CC + if(hasFPU()) { + __ vmov_f32(d0, r0); + } +#endif + break; + case T_DOUBLE : +#ifndef HARD_FLOAT_CC + if(hasFPU()) { + __ vmov_f64(d0, r0, r1); + } +#endif + break; + case T_ARRAY: // Really a handle + case T_OBJECT: // Really a handle + break; // can't de-handlize until after safepoint check + case T_VOID: break; + case T_LONG: break; + default : ShouldNotReachHere(); + } + + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ mov(rscratch1, _thread_in_native_trans); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ dmb(Assembler::ISH); + __ str(rscratch1, rscratch2); + + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ membar(Assembler::AnyAny); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(rthread, rscratch1); + } + } + + Label after_transition; + + // check for safepoint operation in progress and/or pending suspend requests + { + Label Continue; + + Label L; + __ safepoint_poll_acquire(L); + __ ldr(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); + __ cbz(rscratch1, Continue); + __ bind(L); + + // Don't use call_VM as it will see a possible pending exception and forward it + // and never return here preventing us from clearing _last_native_pc down below. + // + save_native_result(masm, ret_type, stack_slots); + __ mov(c_rarg0, rthread); +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + if (!is_critical_native) { + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); + } else { + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition))); + } + __ bl(rscratch1); + __ maybe_isb(); + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + + if (is_critical_native) { + // The call above performed the transition to thread_in_Java so + // skip the transition logic below. + __ b(after_transition); + } + + __ bind(Continue); + } + + // change thread state + __ mov(rscratch1, _thread_in_Java); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ dmb(Assembler::ISH); + __ str(rscratch1, rscratch2); + __ bind(after_transition); + + Label reguard; + Label reguard_done; + __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset())); + __ cmp(rscratch1, JavaThread::stack_guard_yellow_reserved_disabled); + __ b(reguard, Assembler::EQ); + __ bind(reguard_done); + + // native result if any is live + + // Unlock + Label unlock_done; + Label slow_path_unlock; + if (method->is_synchronized()) { + const Register obj_reg = r2; // Will contain the oop + const Register lock_reg = rscratch1; // Address of compiler lock object (BasicLock) + const Register old_hdr = r3; // value of old header at unlock time + + // Get locked oop from the handle we passed to jni + __ ldr(obj_reg, Address(oop_handle_reg, 0)); + + if (UseBiasedLocking) { + __ biased_locking_exit(obj_reg, old_hdr, unlock_done); + } + + // Simple recursive lock? + // get address of the stack lock + __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + + // get old displaced header + __ ldr(old_hdr, Address(lock_reg, 0)); + __ cbz(old_hdr, unlock_done); + + // Atomic swap old header if oop still contains the stack lock + Label succeed; + __ cmpxchg_obj_header(lock_reg, old_hdr, obj_reg, rscratch2, succeed, &slow_path_unlock); + __ bind(succeed); + + // slow path re-enters here + __ bind(unlock_done); + } + +#ifdef DTRACE_ENABLED + { + SkipIfEqual skip(masm, &DTraceMethodProbes, false); + save_native_result(masm, ret_type, stack_slots); + __ mov_metadata(c_rarg1, method()); + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), + rthread, c_rarg1); + restore_native_result(masm, ret_type, stack_slots); + } +#endif + + __ reset_last_Java_frame(false); + + // Unbox oop result, e.g. JNIHandles::resolve result. + if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + __ resolve_jobject(r0, rthread, rscratch2); + } + + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rthread, JavaThread::pending_jni_exception_check_fn_offset())); + } + + if (!is_critical_native) { + // reset handle block + __ mov(rscratch1, 0); + __ ldr(r2, Address(rthread, JavaThread::active_handles_offset())); + __ str(rscratch1, Address(r2, JNIHandleBlock::top_offset_in_bytes())); + } + + __ leave(); + + if (!is_critical_native) { + // Any exception pending? + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbnz(rscratch1, exception_pending); + } + + // We're done + __ b(lr); + + // Unexpected paths are out of line and go here + + if (!is_critical_native) { + // forward the exception + __ bind(exception_pending); + + // and forward the exception + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + } + + // Slow path locking & unlocking + if (method->is_synchronized()) { + + // BEGIN Slow path lock + __ bind(slow_path_lock); + + // has last_Java_frame setup. No exceptions so do vanilla call not call_VM + // args are (oop obj, BasicLock* lock, JavaThread* thread) + + // protect the args we've loaded + const int extra_words = save_args(masm, total_c_args, c_arg, out_regs); + + __ ldr(c_rarg0, Address(oop_handle_reg)); + __ lea(c_rarg1, Address(sp, (extra_words + lock_slot_offset) * VMRegImpl::stack_slot_size)); + __ mov(c_rarg2, rthread); + + // Not a leaf but we have last_Java_frame setup as we want + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3); + restore_args(masm, total_c_args, c_arg, out_regs); + +#ifdef ASSERT + { Label L; + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbz(rscratch1, L); + __ stop("no pending exception allowed on exit from monitorenter"); + __ bind(L); + } +#endif + __ b(lock_done); + + // END Slow path lock + + // BEGIN Slow path unlock + __ bind(slow_path_unlock); + + // If we haven't already saved the native result we must save it now as xmm registers + // are still exposed. + + save_native_result(masm, ret_type, stack_slots); + + __ mov(c_rarg2, rthread); + __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); + __ ldr(c_rarg0, Address(oop_handle_reg)); + + // Save pending exception around call to VM (which contains an EXCEPTION_MARK) + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ mov(rscratch2, 0); + __ str(rscratch2, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); + +#ifdef ASSERT + { + Label L; + __ ldr(rscratch2, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cbz(rscratch2, L); + __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); + __ bind(L); + } +#endif // ASSERT + + __ str(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + + restore_native_result(masm, ret_type, stack_slots); + + __ b(unlock_done); + + // END Slow path unlock + + } // synchronized + + // SLOW PATH Reguard the stack if needed + + __ bind(reguard); + save_native_result(masm, ret_type, stack_slots); + rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + restore_native_result(masm, ret_type, stack_slots); + // and continue + __ b(reguard_done); + + + + __ flush(); + + nmethod *nm = nmethod::new_native_nmethod(method, + compile_id, + masm->code(), + vep_offset, + frame_complete, + stack_slots / VMRegImpl::slots_per_word, + (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), + in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), + oop_maps); + + if (is_critical_native) { + nm->set_lazy_critical_native(true); + } + + return nm; +} + +// this function returns the adjust size (in number of words) to a c2i adapter +// activation for use during deoptimization +int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { + assert(callee_locals >= callee_parameters, + "test and remove; got more parms than locals"); + if (callee_locals < callee_parameters) + return 0; // No adjustment for negative locals + int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords; + // diff is counted in stack words + return align_up(diff, 2); +} + + +//------------------------------generate_deopt_blob---------------------------- +void SharedRuntime::generate_deopt_blob() { + + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("deopt_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + int frame_size_in_words; + OopMap* map = NULL; + OopMapSet *oop_maps = new OopMapSet(); + + // ------------- + // This code enters when returning to a de-optimized nmethod. A return + // address has been pushed on the the stack, and return values are in + // registers. + // If we are doing a normal deopt then we were called from the patched + // nmethod from the point we returned to the nmethod. So the return + // address on the stack is wrong by NativeCall::instruction_size + // We will adjust the value so it looks like we have the original return + // address on the stack (like when we eagerly deoptimized). + // In the case of an exception pending when deoptimizing, we enter + // with a return address on the stack that points after the call we patched + // into the exception handler. We have the following register state from, + // e.g., the forward exception stub (see stubGenerator_x86_64.cpp). + // r0: exception oop + // r7: exception handler + // r3: throwing pc + // So in this case we simply jam r3 into the useless return address and + // the stack looks just like we want. + // + // At this point we need to de-opt. We save the argument return + // registers. We call the first C routine, fetch_unroll_info(). This + // routine captures the return values and returns a structure which + // describes the current frame size and the sizes of all replacement frames. + // The current frame is compiled code and may contain many inlined + // functions, each with their own JVM state. We pop the current frame, then + // push all the new frames. Then we call the C routine unpack_frames() to + // populate these frames. Finally unpack_frames() returns us the new target + // address. Notice that callee-save registers are BLOWN here; they have + // already been captured in the vframeArray at the time the return PC was + // patched. + address start = __ pc(); + Label cont; + + // Prolog for non exception case! + + // Save everything in sight. + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, true); + + // Normal deoptimization. Save exec mode for unpack_frames. + __ mov(r7, Deoptimization::Unpack_deopt); // callee-saved + __ b(cont); + + int reexecute_offset = __ pc() - start; + + // Reexecute case + // return address is the pc describes what bci to do re-execute at + + // No need to update map as each call to save_live_registers will produce identical oopmap + (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + __ mov(r7, Deoptimization::Unpack_reexecute); // callee-saved + __ b(cont); + + int exception_offset = __ pc() - start; + + // Prolog for exception case + + // all registers are dead at this entry point, except for r0, and + // r3 which contain the exception oop and exception pc + // respectively. Set them in TLS and fall thru to the + // unpack_with_exception_in_tls entry point. + + __ str(r3, Address(rthread, JavaThread::exception_pc_offset())); + __ str(r0, Address(rthread, JavaThread::exception_oop_offset())); + + int exception_in_tls_offset = __ pc() - start; + + // new implementation because exception oop is now passed in JavaThread + + // Prolog for exception case + // All registers must be preserved because they might be used by LinearScan + // Exceptiop oop and throwing PC are passed in JavaThread + // tos: stack at point of call to method that threw the exception (i.e. only + // args are on the stack, no return address) + + // The return address pushed by save_live_registers will be patched + // later with the throwing pc. The correct value is not available + // now because loading it from memory would destroy registers. + + // NB: The SP at this point must be the SP of the method that is + // being deoptimized. Deoptimization assumes that the frame created + // here by save_live_registers is immediately below the method's SP. + // This is a somewhat fragile mechanism. + + // Save everything in sight. + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + // Now it is safe to overwrite any register + + // Deopt during an exception. Save exec mode for unpack_frames. + __ mov(r7, Deoptimization::Unpack_exception); // callee-saved + + // load throwing pc from JavaThread and patch it as the return address + // of the current frame. Then clear the field in JavaThread + + __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset())); + __ str(r3, Address(rfp, frame::get_return_addr_offset() * wordSize)); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + +#ifdef ASSERT + // verify that there is really an exception oop in JavaThread + __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ verify_oop(r0); + + // verify that there is no pending exception + Label no_pending_exception; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, no_pending_exception); + __ stop("must not have pending exception here"); + __ bind(no_pending_exception); +#endif + + __ bind(cont); + + // Call C code. Need thread and this frame, but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. + // + // UnrollBlock* fetch_unroll_info(JavaThread* thread) + + // fetch_unroll_info needs to call last_java_frame(). + + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); +#ifdef ASSERT0 + { Label L; + __ ldr(rscratch1, Address(rthread, + JavaThread::last_Java_fp_offset())); + __ cbz(rscratch1, L); + __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared"); + __ bind(L); + } +#endif // ASSERT + __ mov(c_rarg0, rthread); + __ mov(c_rarg1, r7); // rcpool + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); + __ bl(rscratch1); + __ bind(retaddr); + + // Need to have an oopmap that tells fetch_unroll_info where to + // find any register it might need. + oop_maps->add_gc_map(__ pc() - start, map); + + __ reset_last_Java_frame(false); + + // Load UnrollBlock* into r5 + __ mov(r5, r0); + + Label noException; + __ cmp(r7, Deoptimization::Unpack_exception); // Was exception pending? + __ b(noException, Assembler::NE); + __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); + // QQQ this is useless it was NULL above + __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset())); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ str(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); + + __ verify_oop(r0); + + // Overwrite the result registers with the exception results. + __ str(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::r0_off))); + // I think this is useless + // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes())); + + __ bind(noException); + + // Only register save data is on the stack. + // Now restore the result registers. Everything else is either dead + // or captured in the vframeArray. + RegisterSaver::restore_result_registers(masm); + + // All of the register save area has been popped of the stack. Only the + // return address remains. + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + // + // Note: by leaving the return address of self-frame on the stack + // and using the size of frame 2 to adjust the stack + // when we are done the return to frame 3 will still be on the stack. + + // Pop deoptimized frame + __ ldr(r2, Address(r5, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes())); + __ sub(r2, r2, frame::get_frame_size() * wordSize); + __ add(sp, sp, r2); + if (FrameAPCS) { + // frame constructed with + // push {r11, r12, lr, pc} + __ ldr(rfp, __ post(sp, 2 * wordSize)); + __ ldr(lr, __ post(sp, 2 * wordSize)); + } else { + __ ldrd(rfp, lr, __ post(sp, 2 * wordSize)); + } + // LR should now be the return address to the caller (3) + +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. + if (UseStackBanging) { + __ ldr(rscratch2, Address(r5, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(rscratch2, r2); + } +#endif + // Load address of array of frame pcs into r2 + __ ldr(r2, Address(r5, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + + // Trash the old pc + // __ addptr(sp, wordSize); FIXME ???? + + // Load address of array of frame sizes into r4 + __ ldr(r4, Address(r5, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes())); + + // Load counter into r3 + __ ldr(r3, Address(r5, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes())); + + // Now adjust the caller's stack to make up for the extra locals + // but record the original sp so that we can save it in the skeletal interpreter + // frame and the stack walking of interpreter_sender will get the unextended sp + // value and not the "real" sp value. + + const Register sender_sp = r6; + + __ mov(sender_sp, sp); + __ ldr(rscratch1, Address(r5, + Deoptimization::UnrollBlock:: + caller_adjustment_offset_in_bytes())); + __ sub(sp, sp, rscratch1); + + // Push interpreter frames in a loop + __ mov(rscratch1, (address)0xDEADDEAD); // Make a recognizable pattern + // Initially used to place 0xDEADDEAD in rscratch2 as well - why? + __ mov(rscratch2, 0); + Label loop; + __ bind(loop); + __ ldr(rscratch1, Address(__ post(r4, wordSize))); // Load frame size + __ sub(rscratch1, rscratch1, frame::get_frame_size() * wordSize); // We'll push frame backtrace by hand + __ ldr(lr, Address(__ post(r2, wordSize))); // Load pc + __ enter(); // Save old & set new fp + __ sub(sp, sp, rscratch1); // Prolog + // This value is corrected by layout_activation_impl + __ str(rscratch2, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + __ str(sender_sp, Address(rfp, frame::get_interpreter_frame_sender_sp_offset() * wordSize)); // Make it walkable + __ mov(sender_sp, sp); // Pass sender_sp to next frame + __ sub(r3, r3, 1); // Decrement counter + __ cbnz(r3, loop); + + // Re-push self-frame + __ ldr(lr, Address(r2)); + __ enter(); + + // Allocate a full sized register save area. We subtract frame::get_frame_size() words, + // because enter() just pushed them. + __ sub(sp, sp, (frame_size_in_words - frame::get_frame_size()) * wordSize); + + // Restore frame locals after moving the frame + if(hasFPU()) { + __ vstr_f64(d0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::fpu_state_off))); + } + __ strd(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::r0_off))); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + // + // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode) + + // Use rfp because the frames look interpreted now + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(sp, rfp, the_pc, rscratch1); + + __ mov(c_rarg0, rthread); + __ mov(c_rarg1, r7); // second arg: exec_mode + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); + __ bl(rscratch1); + + // Set an oopmap for the call site + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, + new OopMap( frame_size_in_words, 0 )); + + // Clear fp AND pc + __ reset_last_Java_frame(true); + + // Collect return values + if(hasFPU()) { + __ vldr_f64(d0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::fpu_state_off))); + } + __ ldrd(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::r0_off))); + // I think this is useless (throwing pc?) + // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes())); + + // Pop self-frame. + __ leave(); // Epilog + + // Jump to interpreter + __ b(lr); + + // Make sure all code is generated + masm->flush(); + + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); + +} + +uint SharedRuntime::out_preserve_stack_slots() { + return 0; +} + +#if COMPILER2_OR_JVMCI +//------------------------------generate_uncommon_trap_blob-------------------- +void SharedRuntime::generate_uncommon_trap_blob() { + // Allocate space for the code + ResourceMark rm; + // Setup code generation tools + CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned"); + + address start = __ pc(); + + // Push self-frame. We get here with a return address in LR + // and sp should be 16 byte aligned + // push rfp and retaddr by hand + __ enter(); + // we don't expect an arg reg save area +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + // compiler left unloaded_class_index in j_rarg0 + __ mov(c_rarg1, j_rarg0); + + // we need to set the past SP to the stack pointer of the stub frame + // and the pc to the address where this runtime call will return + // although actually any pc in this code blob will do). + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // capture callee-saved registers as well as return values. + // + // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index); + // + // n.b. 2 gp args, 0 fp args, integral return type + + __ mov(c_rarg0, rthread); + __ mov(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap); + __ lea(rscratch1, + RuntimeAddress(CAST_FROM_FN_PTR(address, + Deoptimization::uncommon_trap))); + __ bl(rscratch1); + __ bind(retaddr); + + // Set an oopmap for the call site + OopMapSet* oop_maps = new OopMapSet(); + OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0); + + // location of rfp is known implicitly by the frame sender code + + oop_maps->add_gc_map(__ pc() - start, map); + + __ reset_last_Java_frame(false); + + // move UnrollBlock* into r4 + __ mov(r4, r0); + + // Pop all the frames we must move/replace. + // + // Frame picture (youngest to oldest) + // 1: self-frame (no frame link) + // 2: deopting frame (no frame link) + // 3: caller of deopting frame (could be compiled/interpreted). + + // Pop self-frame + __ leave(); + + // Pop deoptimized frame (int) + __ ldr(r2, Address(r4, + Deoptimization::UnrollBlock:: + size_of_deoptimized_frame_offset_in_bytes())); + __ add(sp, sp, r2); + +#ifdef ASSERT + // Compilers generate code that bang the stack by as much as the + // interpreter would need. So this stack banging should never + // trigger a fault. Verify that it does not on non product builds. + if (UseStackBanging) { + // The compiled method that we are deoptimizing was popped from the stack. + // If the stack bang results in a stack overflow, we don't return to the + // method that is being deoptimized. The stack overflow exception is + // propagated to the caller of the deoptimized method. Need to get the pc + // from the caller in LR and restore FP. + __ ldr(r2, Address(r4, + Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + __ ldr(lr, Address(r2, 0)); + __ ldr(rfp, Address(r4, Deoptimization::UnrollBlock::initial_info_offset_in_bytes())); + __ ldr(r1, Address(r4, + Deoptimization::UnrollBlock:: + total_frame_sizes_offset_in_bytes())); + __ bang_stack_size(r1, r2); + } +#endif + // Now is the time to restore frameptr. Need to take what was in the frame header + // since it can be real FP if previous frame was interpreted/C1 or arbitrary value if C2 + __ ldr(rfp, Address(sp, -2*wordSize)/*Address(r4, + Deoptimization::UnrollBlock::initial_info_offset_in_bytes())*/); + + // Load address of array of frame pcs into r2 (address*) + __ ldr(r2, Address(r4, + Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes())); + + // Load address of array of frame sizes into r5 (intptr_t*) + __ ldr(r5, Address(r4, + Deoptimization::UnrollBlock:: + frame_sizes_offset_in_bytes())); + + // Counter + __ ldr(r3, Address(r4, + Deoptimization::UnrollBlock:: + number_of_frames_offset_in_bytes())); // (int) + + // Now adjust the caller's stack to make up for the extra locals but + // record the original sp so that we can save it in the skeletal + // interpreter frame and the stack walking of interpreter_sender + // will get the unextended sp value and not the "real" sp value. + + const Register sender_sp = r7; + + __ mov(sender_sp, sp); + __ ldr(r1, Address(r4, + Deoptimization::UnrollBlock:: + caller_adjustment_offset_in_bytes())); // (int) + __ sub(sp, sp, r1); + + __ mov(rscratch1, 0); + // Push interpreter frames in a loop + Label loop; + __ bind(loop); + __ ldr(r1, __ post(r5, wordSize)); // Load frame size + __ sub(r1, r1, 2 * wordSize); // We'll push pc and rfp by hand + __ ldr(lr, __ post(r2, wordSize)); // Save return address + __ enter(); // and old rfp & set new rfp + __ sub(sp, sp, r1); // Prolog + __ str(sender_sp, Address(rfp, frame::get_interpreter_frame_sender_sp_offset() * wordSize)); // Make it walkable + // This value is corrected by layout_activation_impl + __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); //zero it + __ mov(sender_sp, sp); // Pass sender_sp to next frame + __ subs(r3, r3, 1); // Decrement counter + __ b(loop, Assembler::GT); + __ ldr(lr, Address(r2, 0)); // save final return address + // Re-push self-frame + __ enter(); // & old rfp & set new rfp + + // Use rfp because the frames look interpreted now + // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. + // Don't need the precise return PC here, just precise enough to point into this code blob. + address the_pc = __ pc(); + __ set_last_Java_frame(sp, rfp, the_pc, rscratch1); + + // Call C code. Need thread but NOT official VM entry + // crud. We cannot block on this call, no GC can happen. Call should + // restore return values to their stack-slots with the new SP. + // Thread is in rdi already. + // + // BasicType unpack_frames(JavaThread* thread, int exec_mode); + // + // n.b. 2 gp args, 0 fp args, integral return type + + // sp should already be aligned + __ mov(c_rarg0, rthread); + __ mov(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); + __ bl(rscratch1); + + // Set an oopmap for the call site + // Use the same PC we used for the last java frame + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + + // Clear fp AND pc + __ reset_last_Java_frame(true); + + // Pop self-frame. + __ leave(); // Epilog + + // Jump to interpreter + __ b(lr); + + // Make sure all code is generated + masm->flush(); + + _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, + SimpleRuntimeFrame::framesize >> 1); +} +#endif // COMPILER2_OR_JVMCI + + +//------------------------------generate_handler_blob------ +// +// Generate a special Compile2Runtime blob that saves all registers, +// and setup oopmap. +// +SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) { + ResourceMark rm; + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map; + + // Allocate space for the code. Setup code generation tools. + CodeBuffer buffer("handler_blob", 2048, 1024); + MacroAssembler* masm = new MacroAssembler(&buffer); + + address start = __ pc(); + address call_pc = NULL; + int frame_size_in_words; + bool cause_return = (poll_type == POLL_AT_RETURN); + + // If cause_return is true we are at a poll_return and there is + // the return address on the stack to the caller on the nmethod + // that is safepoint. We can leave this return on the stack and + // effectively complete the return and safepoint in the caller. + // Otherwise we push space for a return address that the safepoint + // handler will install later to make the stack walking sensible. + if (!cause_return) { + __ sub(sp, sp, wordSize); // make room for return address + } + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, !cause_return); + + // The following is basically a call_VM. However, we need the precise + // address of the call in order to generate an oopmap. Hence, we do all the + // work outselves. + + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); + + // The return address must always be correct so that frame constructor never + // sees an invalid pc. + + if (!cause_return) { + // overwrite the return address pushed by save_live_registers + // Additionally, r5 is a callee-saved register so we can look at + // it later to determine if someone changed the return address for + // us! + __ ldr(r5, Address(rthread, JavaThread::saved_exception_pc_offset())); + __ str(r5, Address(rfp, frame::get_return_addr_offset() * wordSize)); + } + + // Do the call + __ mov(c_rarg0, rthread); + __ lea(rscratch1, RuntimeAddress(call_ptr)); + __ bl(rscratch1); + __ bind(retaddr); + + // Set an oopmap for the call site. This oopmap will map all + // oop-registers and debug-info registers as callee-saved. This + // will allow deoptimization at this safepoint to find all possible + // debug-info recordings, as well as let GC find all oops. + + oop_maps->add_gc_map( __ pc() - start, map); + + Label noException, no_adjust, bail; + + __ reset_last_Java_frame(false); + + __ maybe_isb(); + __ membar(Assembler::LoadLoad | Assembler::LoadStore); + + if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbnz(rscratch1, no_adjust); + + // If our stashed return pc was modified by the runtime we avoid touching it + __ ldr(rscratch1, Address(rfp, frame::get_return_addr_offset() * wordSize)); + __ cmp(r5, rscratch1); + __ b(no_adjust, Assembler::NE); + +#ifdef ASSERT + // Verify the correct encoding of the poll we're about to skip. + // ldr(r12, [r12, #0]); + __ ldr(rscratch1, Address(r5)); + __ bic(rscratch1, rscratch1, ~0xfff0ffff); + __ mov(rscratch2, 0xe590c000); + __ cmp(rscratch1, rscratch2); + __ b(bail, Assembler::NE); +#endif + // Adjust return pc forward to step over the safepoint poll instruction + __ add(r5, r5, NativeInstruction::arm_insn_sz); + __ str(r5, Address(rfp, frame::get_return_addr_offset() * wordSize)); + } + + __ bind(no_adjust); + + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cmp(rscratch1, 0); + + // does not kill flags + RegisterSaver::restore_live_registers(masm, cause_return); + // for !POLL_AT_RETURN the stack has return address on it + + __ b(noException, Assembler::EQ); + + // Exception pending + if (cause_return) + __ mov(r3, lr); + else + __ pop(r3); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // No exception case + __ bind(noException); + + if (cause_return) + __ b(lr); + else + __ pop(r15_pc); + +#ifdef ASSERT + __ bind(bail); + __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); +#endif + + // Make sure all code is generated + masm->flush(); + + // Fill-out other meta info + return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); +} + +// +// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss +// +// Generate a stub that calls into vm to find out the proper destination +// of a java call. All the argument registers are live at this point +// but since this is generic code we don't know what they are and the caller +// must do any gc of the args. +// +RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { + assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); + + // allocate space for the code + ResourceMark rm; + + //CodeBuffer buffer(name, 1000, 512); + CodeBuffer buffer(name, 2048, 512 ); // changed as error later + MacroAssembler* masm = new MacroAssembler(&buffer); + + int frame_size_in_words; + + OopMapSet *oop_maps = new OopMapSet(); + OopMap* map = NULL; + + int start = __ offset(); + + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + int frame_complete = __ offset(); + + { + Label retaddr; + __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); + + __ mov(c_rarg0, rthread); + __ lea(rscratch1, RuntimeAddress(destination)); + + __ bl(rscratch1); + __ bind(retaddr); + } + + // Set an oopmap for the call site. + // We need this not only for callee-saved registers, but also for volatile + // registers that the compiler might be keeping live across a safepoint. + + oop_maps->add_gc_map( __ offset() - start, map); + + __ maybe_isb(); + + // r0 contains the address we are going to jump to assuming no exception got installed + + // clear last_Java_sp + __ reset_last_Java_frame(false); + // check for pending exceptions + Label pending; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbnz(rscratch1, pending); + + // get the returned Method* + __ get_vm_result_2(rmethod, rthread); + __ str(rmethod, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::rmethod_off))); + + // r0 is where we want to jump, overwrite rscratch1 which is saved and scratch + __ str(r0, Address(sp, RegisterSaver::offset_in_bytes(RegisterSaver::rscratch1_off))); + RegisterSaver::restore_live_registers(masm); + + // We are back the the original state on entry and ready to go. + + __ b(rscratch1); + + // Pending exception after the safepoint + + __ bind(pending); + + RegisterSaver::restore_live_registers(masm); + + // exception pending => remove activation and forward to exception handler + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rthread, JavaThread::vm_result_offset())); + + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + // ------------- + // make sure all code is generated + masm->flush(); + + // return the blob + // frame_size_words or bytes?? + return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true); +} + + +#if COMPILER2_OR_JVMCI +// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame +// +//------------------------------generate_exception_blob--------------------------- +// creates exception blob at the end +// Using exception blob, this code is jumped from a compiled method. +// (see emit_exception_handler in x86_64.ad file) +// +// Given an exception pc at a call we call into the runtime for the +// handler in this method. This handler might merely restore state +// (i.e. callee save registers) unwind the frame and jump to the +// exception handler for the nmethod if there is no Java level handler +// for the nmethod. +// +// This code is entered with a jmp. +// +// Arguments: +// r0: exception oop +// r3: exception pc +// +// Results: +// r0: exception oop +// r3: exception pc in caller or ??? +// destination: exception handler of caller +// +// Note: the exception pc MUST be at a call (precise debug information) +// Registers r0, r3, r2, r4, r5, r8-r11 are not callee saved. +// + +void OptoRuntime::generate_exception_blob() { + // allocate space for code + ResourceMark rm; + int pad = VerifyThread ? 256 : 0;// Extra slop space for more verify code + + // setup code generation tools + // Measured 8/7/03 at 256 in 32bit debug build (no VerifyThread) + // Measured 8/7/03 at 528 in 32bit debug build (VerifyThread) + CodeBuffer buffer("exception_blob", 600+pad, 512); + MacroAssembler* masm = new MacroAssembler(&buffer); + + int framesize_in_words = 2; // FP + LR + int framesize_in_bytes = framesize_in_words * wordSize; + int framesize_in_slots = framesize_in_bytes / sizeof(jint); + + address start = __ pc(); + + __ str(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ str(r3, Address(rthread, JavaThread::exception_pc_offset())); + + // This call does all the hard work. It checks if an exception catch + // exists in the method. + // If so, it returns the handler address. + // If the nmethod has been deoptimized and it had a handler the handler + // address is the deopt blob unpack_with_exception entry. + // + // If no handler exists it prepares for stack-unwinding, restoring the callee-save + // registers of the frame being removed. + // + __ mov(lr, r3); + __ enter(); + address the_pc = __ pc(); + __ set_last_Java_frame(sp, noreg, the_pc, rscratch1); + + __ mov(r0, rthread); + + // This call can block at exit and nmethod can be deoptimized at that + // point. If the nmethod had a catch point we would jump to the + // now deoptimized catch point and fall thru the vanilla deopt + // path and lose the exception + // Sure would be simpler if this call didn't block! + __ call(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)); + + // Set an oopmap for the call site. This oopmap will only be used if we + // are unwinding the stack. Hence, all locations will be dead. + // Callee-saved registers will be the same as the frame above (i.e., + // handle_exception_stub), since they were restored when we got the + // exception. + + OopMapSet* oop_maps = new OopMapSet(); + + oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0)); + + __ reset_last_Java_frame(false); + + __ leave(); + + // Restore SP from its saved reg (FP) if the exception PC is a MethodHandle call site. + __ ldr(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset())); + __ cmp(rscratch1, 0); + __ mov(sp, rfp, Assembler::NE); + + // We have a handler in r0 (could be deopt blob). + __ mov(rscratch2, r0); + + // Since this may be the deopt blob we must set R3 to look like we returned + // from the original pc that threw the exception + + __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset())); + + __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset())); + __ mov(rscratch1, 0); +#ifdef ASSERT + __ str(rscratch1, Address(rthread, JavaThread::exception_handler_pc_offset())); + __ str(rscratch1, Address(rthread, JavaThread::exception_pc_offset())); +#endif + // Clear the exception oop so GC no longer processes it as a root. + __ str(rscratch1, Address(rthread, JavaThread::exception_oop_offset())); + __ b(rscratch2); + + // ------------- + // make sure all code is generated + masm->flush(); + + _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize_in_words); +} +#endif // COMPILER2_OR_JVMCI --- /dev/null 2018-09-25 19:25:25.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/stubGenerator_aarch32.cpp 2018-09-25 19:25:25.000000000 +0300 @@ -0,0 +1,2904 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "nativeInst_aarch32.hpp" +#include "oops/instanceOop.hpp" +#include "oops/method.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "vm_version_aarch32.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + + +// Declaration and definition of StubGenerator (no .hpp file). +// For a more detailed description of the stub routine structure +// see the comment in stubRoutines.hpp + +#undef __ +#define __ _masm-> +#define TIMES_OOP lsl(exact_log2(4)) + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +// Stub Code definitions + +class StubGenerator: public StubCodeGenerator { + private: + +#ifdef PRODUCT +#define inc_counter_np(counter) ((void)0) +#else + void inc_counter_np_(int& counter) { + __ lea(rscratch2, ExternalAddress((address)&counter)); + __ ldr(rscratch1, Address(rscratch2)); + __ add(rscratch1, rscratch1, 1); + __ str(rscratch1, Address(rscratch2)); + } +#define inc_counter_np(counter) \ + BLOCK_COMMENT("inc_counter " #counter); \ + inc_counter_np_(counter); +#endif + + // Call stubs are used to call Java from C + // + // There are only four registers available to house arguments and we're expecting eight + // the layout will be as follows: + + // c_rarg0 = call wrapper address + // c_rarg1 = result + // c_rarg2 = result type + // c_rarg3 = method + // sp -> [ entry_point + // parameters -> java params + // parameter size (in words) + // thread] (address increasing) + // + // We don't + // NEW!! layout for aarch32 so that save and restore can be collapsed into a single + // load/store + // layout of saved registers now is + // 0 [ saved lr ] <- rfp + // -1 [ saved fp ] + // -2 [ r12/rthread ] Thread passed in args + // -3 [ r10/rmethod ] NOTE omitted rfp as restored automatically + // -4 [ r9/rscratch1 ] Platform register? + // -5 [ r8/thread ] + // -6 [ r7/rcpool ] + // -7 [ r6/rlocals ] + // -8 [ r5/rbcp ] + // -9 [ r4/rdispatch ] + // -10 [ r2/res type ] + // -11 [ r1/result ] + // -12 [r0/call wrapper]<- sp (when restored from fp value) + // -13 maybe alignment + // -YY [ java arg0 ] + // ... + // -xx [ java argn ] <- sp on branch into java + // + // XXX Note we do not save floating point registers + // Only floating point registers s16-31 / d8-15 need to be saved + // these are never touched by template interpreted code. + // On a sequence such as C -> Java -> C, the C functions will save them if used. + + address generate_call_stub(address& return_address) { + /*assert((int)frame::entry_frame_after_call_words == -(int)sp_after_call_off + 1 && + (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, + "adjust this code");*/ + const int thread_off = -frame::get_frame_size(VMFrameAPCS) * wordSize; + + StubCodeMark mark(this, "StubRoutines", "call_stub"); + address start = __ pc(); + __ reg_printf("entering call stub with { sp : %p, rfp : %p, lr : %p}\n", sp, rfp, lr); + __ enter(VMFrameAPCS); //save rfp & lr and possibly another 2 words + + const int entry_point_arg_off = 1 * wordSize, + params_arg_off = 2 * wordSize, + param_sz_arg_off = 3 * wordSize, + thread_arg_off = 4 * wordSize; + // r12 is a scratch register so we can clobber it to save thread + // which is needed at the end + __ ldr(r12, Address(rfp, thread_arg_off)); + // r0, r1, r2, r4 - r10, r12 + // we save r0 as the call_wrapper_address is needed elsewhere + // we save r1, r2 as they hold the result and it's type, + // which are needed on return + // r12 holds the thread ptr + unsigned c_save_regset = 0b0001011111110111; + int nsaved = __ count_bits(c_save_regset); + __ stmdb(sp, c_save_regset); + + // Offset from rfp to end of stack. + const int rfp_tos_offset_bytes = frame::get_offset_from_rfp_bytes() + nsaved * wordSize; + + // install Java thread in global register now we have saved + // whatever value it held + __ mov(rthread, r12); + // And method + __ mov(rmethod, c_rarg3); + +#ifdef ASSERT + // make sure we have no pending exceptions + { + Label L; + __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset()))); + __ cmp(rscratch1, (unsigned)NULL_WORD); + __ b(L, Assembler::EQ); + __ stop("StubRoutines::call_stub: entered with pending exception"); + __ BIND(L); + } +#endif + __ ldr(rscratch2, Address(rfp, param_sz_arg_off)); + // align sp at the time we call java + __ sub(sp, sp, rscratch2, lsl(LogBytesPerWord)); + __ align_stack(); + __ add(sp, sp, rscratch2, lsl(LogBytesPerWord)); + + __ ldr(rscratch1, Address(rfp, params_arg_off)); + + BLOCK_COMMENT("pass parameters if any"); + Label parameters_done; + + __ reg_printf("call_stub param_off = %p, param_sz = %d\n", rscratch1, rscratch2); + __ cmp(rscratch2, 0); + __ b(parameters_done, Assembler::EQ); + + // r14 makes ok temp as already saved in frame header + address loop = __ pc(); + __ ldr(r14, Address(__ post(rscratch1, wordSize))); + __ subs(rscratch2, rscratch2, 1); + + // TODO remove + __ reg_printf("\tARG SP[%d] : 0x%08x\n", rscratch2, r14); + __ cmp(rscratch2, 0); + // END TODO + __ push(r14); + __ b(loop, Assembler::GT); + + __ BIND(parameters_done); + +#ifdef ASSERT + __ verify_stack_alignment(); +#endif + + BLOCK_COMMENT("call Java function"); + __ ldr(rscratch1, Address(rfp, entry_point_arg_off)); + __ reg_printf("Calling Java function with rfp = %p, sp = %p\n", rfp, sp); + __ mov(r4, sp); // set sender sp + __ bl(rscratch1); + // save current address for use by exception handling code + return_address = __ pc(); + + __ reg_printf("Returned to call_stub with rfp = %p, sp = %p\n", rfp, sp); + + // At this point rfp should be restored to the value it was set to before + // use it to set the top of stack. + __ sub(sp, rfp, rfp_tos_offset_bytes); + +#ifdef ASSERT + // verify that threads correspond + __ ldr(r12, Address(rfp, thread_off)); + //rfp points to register stored in highest memory location - first on + // stack, that's the saved lr, r12 is just below that + // stored in r12 at this point + { + Label L, S; + __ cmp(rthread, r12); + __ b(S, Assembler::NE); + __ get_thread(r12); + __ cmp(rthread, r12); + __ b(L, Assembler::EQ); + __ BIND(S); + __ stop("StubRoutines::call_stub: threads must correspond"); + __ BIND(L); + } +#endif + + if(MacroAssembler::enable_debugging_static) { + // FIXME Remove this hacky debugging code + Label L; + __ ldr(rscratch2, Address(rthread, Thread::pending_exception_offset())); + __ cbnz(rscratch2, L); + // If we're returning via an exception then we shouldn't report exit, + // the exception handler will have already reported the exit and reporting + // via our progress through the call stub will result in an extra method + // being reported as exited. + __ print_method_exit(); + __ bind(L); + } + + // NOTE Horrible tricks here + // We need to preserve current r0 and r1 values as they contain the return value. + // First we discard r0 saved to stack, no longer needed. + // We have saved result and type as c_rarg1 and c_rarg2, so now we alter + // the regset to load as follows: + // c_rarg2 = result + // c_rarg3 = result_type + + assert((c_save_regset & 0xf) == 0b0111, "change me"); + __ add(sp, sp, wordSize); + const int altered_saved_regset = (~0xf & c_save_regset) | 0xc; + __ ldmia(sp, altered_saved_regset); + + // store result depending on type (everything that is not + // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) + // n.b. this assumes Java returns an integral result in r0 + // and a floating result in j_farg0 + + Label is_object, is_long, is_float, is_double, exit; + __ cmp(c_rarg3, T_OBJECT); + __ b(is_object, Assembler::EQ); + __ cmp(c_rarg3, T_LONG); + __ b(is_long, Assembler::EQ); + if(hasFPU()) { + // soft FP fall through T_INT case + __ cmp(c_rarg3, T_FLOAT); + __ b(is_float, Assembler::EQ); + } + __ cmp(c_rarg3, T_DOUBLE); + if(hasFPU()) { + __ b(is_double, Assembler::EQ); + } else { + __ b(is_long, Assembler::EQ); + } + + // handle T_INT case + __ str(r0, Address(c_rarg2)); + + __ BIND(exit); + __ leave(VMFrameAPCS); //Restore rfp, sp, lr + __ reg_printf("leaving call stub with { sp : %p, rfp : %p, lr : %p}\n", sp, rfp, lr); + // Pop arguments from stack. + //__ add(sp, sp, 4 * wordSize); + + __ b(lr); + + // handle return types different from T_INT + __ BIND(is_object); + __ mov(r1, 0); + + __ BIND(is_long); + __ strd(r0, r1, Address(c_rarg2, 0)); + __ b(exit, Assembler::AL); + + if(hasFPU()) { + __ BIND(is_float); + __ vstr_f32(f0, Address(c_rarg2, 0)); + __ b(exit, Assembler::AL); + + __ BIND(is_double); + __ vstr_f64(d0, Address(c_rarg2, 0)); + __ b(exit, Assembler::AL); + } + return start; + } + + // Return point for a Java call if there's an exception thrown in + // Java code. The exception is caught and transformed into a + // pending exception stored in JavaThread that can be tested from + // within the VM. + // + // Note: Usually the parameters are removed by the callee. In case + // of an exception crossing an activation frame boundary, that is + // not the case if the callee is compiled code => need to setup the + // rsp. + // + // r0: exception oop + + // NOTE: this is used as a target from the signal handler so it + // needs an x86 prolog which returns into the current simulator + // executing the generated catch_exception code. so the prolog + // needs to install rax in a sim register and adjust the sim's + // restart pc to enter the generated code at the start position + // then return from native to simulated execution. + + address generate_catch_exception() { + const int thread_off = -frame::get_frame_size(VMFrameAPCS) * wordSize; + + StubCodeMark mark(this, "StubRoutines", "catch_exception"); + address start = __ pc(); + + // same as in generate_call_stub(): + const Address thread(rfp, thread_off); + +#ifdef ASSERT + // verify that threads correspond + { + Label L, S; + __ ldr(rscratch1, thread); + __ cmp(rthread, rscratch1); + __ b(S, Assembler::NE); + __ get_thread(rscratch1); + __ cmp(rthread, rscratch1); + __ b(L, Assembler::EQ); + __ bind(S); + __ stop("StubRoutines::catch_exception: threads must correspond"); + __ bind(L); + } +#endif + + // set pending exception + __ verify_oop(r0); + + __ str(r0, Address(rthread, Thread::pending_exception_offset())); + __ mov(rscratch1, (address)__FILE__); + __ str(rscratch1, Address(rthread, Thread::exception_file_offset())); + __ mov(rscratch1, (int)__LINE__); + __ str(rscratch1, Address(rthread, Thread::exception_line_offset())); + + // complete return to VM + assert(StubRoutines::_call_stub_return_address != NULL, + "_call_stub_return_address must have been generated before"); + __ b(StubRoutines::_call_stub_return_address); + + return start; + } + + // Continuation point for runtime calls returning with a pending + // exception. The pending exception check happened in the runtime + // or native call stub. The pending exception in Thread is + // converted into a Java-level exception. + // + // Contract with Java-level exception handlers: + // r0: exception + // r3: throwing pc + // + // NOTE: At entry of this stub, exception-pc must be in LR !! + + // NOTE: this is always used as a jump target within generated code + // so it just needs to be generated code wiht no x86 prolog + + address generate_forward_exception() { + //FIXME NOTE ON ALTERATION TO ARM32 IT WAS ASSUMED THAT rmethod + // won't be used anymore and set on entry to the handler - is this true? + + Register spare = rmethod; + + StubCodeMark mark(this, "StubRoutines", "forward exception"); + address start = __ pc(); + + // Upon entry, LR points to the return address returning into + // Java (interpreted or compiled) code; i.e., the return address + // becomes the throwing pc. + // + // Arguments pushed before the runtime call are still on the stack + // but the exception handler will reset the stack pointer -> + // ignore them. A potential result in registers can be ignored as + // well. + +#ifdef ASSERT + // make sure this code is only executed if there is a pending exception + { + Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbnz(rscratch1, L); + __ stop("StubRoutines::forward exception: no pending exception (1)"); + __ bind(L); + } +#endif + + // compute exception handler into r2 + + // call the VM to find the handler address associated with the + // caller address. pass thread in r0 and caller pc (ret address) + // in r1. n.b. the caller pc is in lr, unlike x86 where it is on + // the stack. + __ mov(c_rarg1, lr); + // lr will be trashed by the VM call so we move it to R2 + // (callee-saved) because we also need to pass it to the handler + // returned by this call. + __ mov(spare, lr); //note rscratch1 is a callee saved register + BLOCK_COMMENT("call exception_handler_for_return_address"); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + rthread, c_rarg1); + // we should not really care that lr is no longer the callee + // address. we saved the value the handler needs in spare so we can + // just copy it to r3. however, the C2 handler will push its own + // frame and then calls into the VM and the VM code asserts that + // the PC for the frame above the handler belongs to a compiled + // Java method. So, we restore lr here to satisfy that assert. + __ mov(lr, spare); + // setup r0 & r3 & clear pending exception + __ mov(r3, spare); + __ mov(spare, r0); + __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rthread, Thread::pending_exception_offset())); + +#ifdef ASSERT + // make sure exception is set + { + Label L; + __ cbnz(r0, L); + __ stop("StubRoutines::forward exception: no pending exception (2)"); + __ bind(L); + } +#endif + // continue at exception handler + // r0: exception + // r3: throwing pc + // spare: exception handler + + __ verify_oop(r0); + __ b(spare); + + return start; + } + + // Non-destructive plausibility checks for oops + // + // Arguments: + // r0: oop to verify + // rscratch1: error message + // + // Stack after saving c_rarg3: + // [tos + 0]: saved c_rarg3 + // [tos + 1]: saved c_rarg2 + // [tos + 2]: saved lr + // [tos + 3]: saved rscratch2 + // [tos + 4]: saved r1 + // [tos + 5]: saved r0 + // [tos + 6]: saved rscratch1 + address generate_verify_oop() { + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + address start = __ pc(); + + Label exit, error; + + // save c_rarg2 and c_rarg3 + __ stmdb(sp, RegSet::of(c_rarg2, c_rarg3).bits()); + + __ lea(c_rarg2, ExternalAddress((address) StubRoutines::verify_oop_count_addr())); + __ ldr(c_rarg3, Address(c_rarg2)); + __ add(c_rarg3, c_rarg3, 1); + __ str(c_rarg3, Address(c_rarg2)); + + // object is in r0 + // make sure object is 'reasonable' + __ cbz(r0, exit); // if obj is NULL it is OK + + // Check if the oop is in the right area of memory + __ mov(c_rarg3, (intptr_t) Universe::verify_oop_mask()); + __ andr(c_rarg2, r0, c_rarg3); + __ mov(c_rarg3, (intptr_t) Universe::verify_oop_bits()); + + // Compare c_rarg2 and c_rarg3. We don't use a compare + // instruction here because the flags register is live. + __ eor(c_rarg2, c_rarg2, c_rarg3); + __ cbnz(c_rarg2, error); + + // make sure klass is 'reasonable', which is not zero. + __ load_klass(r0, r0); // get klass + __ cbz(r0, error); // if klass is NULL it is broken + + // return if everything seems ok + __ bind(exit); + + __ ldmia(sp, RegSet::of(c_rarg2, c_rarg3).bits()); + __ b(lr); + + // handle errors + __ bind(error); + __ ldmia(sp, RegSet::of(c_rarg2, c_rarg3).bits()); + + __ pusha(); + // Save old sp + __ add(c_rarg2, sp, 14 * wordSize); + __ str(c_rarg2, Address( __ pre(sp, -wordSize))); + __ mov(c_rarg0, rscratch1); // pass address of error message + __ mov(c_rarg1, lr); // pass return address + __ mov(c_rarg2, sp); // pass address of regs on stack +#ifndef PRODUCT + assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); +#endif + BLOCK_COMMENT("call MacroAssembler::debug"); + __ mov(rscratch1, CAST_FROM_FN_PTR(address, MacroAssembler::debug32)); + __ bl(rscratch1); + __ hlt(0); + + return start; + } + + // NOTE : very strange, I changed this but I don't know why the Address:(signed extend word) was here + //void array_overlap_test(Label& L_no_overlap, Address sf) { __ b(L_no_overlap); } + void array_overlap_test(Label& L_no_overlap) { __ b(L_no_overlap); } + //no test being performed ? + + // + // Small copy: less than 4 bytes. + // + // NB: Ignores all of the bits of count which represent more than 3 + // bytes, so a caller doesn't have to mask them. + + void copy_memory_small(Register s, Register d, Register count, Register tmp, bool is_aligned, int step) { + const int granularity = uabs(step); + const bool gen_always = !is_aligned || (-4 < step && step < 0); + Label halfword, done; + + if ((granularity <= 1) || gen_always) { + __ tst(count, 1); + __ b(halfword, Assembler::EQ); + __ ldrb(tmp, step < 0 ? __ pre(s, -1) : __ post(s, 1)); + __ strb(tmp, step < 0 ? __ pre(d, -1) : __ post(d, 1)); + } + + if ((granularity <= 2) || gen_always) { + __ bind(halfword); + __ tst(count, 2); + __ b(done, Assembler::EQ); + __ ldrh(tmp, step < 0 ? __ pre(s, -2) : __ post(s, 2)); + __ strh(tmp, step < 0 ? __ pre(d, -2) : __ post(d, 2)); + } + + __ bind(done); + } + + void copy_memory_simd(Register s, Register d, + Register count, Register tmp, int step, + DoubleFloatRegSet tmp_set, size_t tmp_set_size ) { + assert(UseSIMDForMemoryOps, "should be available"); + Label simd_loop, simd_small; + + __ cmp(count, tmp_set_size); + __ b(simd_small, Assembler::LT); + + __ mov(tmp, count, __ lsr(exact_log2(tmp_set_size))); + __ sub(count, count, tmp, __ lsl(exact_log2(tmp_set_size))); + + __ bind(simd_loop); + + __ pld(Address(s, step < 0 ? -2 * tmp_set_size : tmp_set_size)); + + if (step < 0) { + __ vldmdb_f64(s, tmp_set.bits()); + __ vstmdb_f64(d, tmp_set.bits()); + } else { + __ vldmia_f64(s, tmp_set.bits()); + __ vstmia_f64(d, tmp_set.bits()); + } + + __ subs(tmp, tmp, 1); + __ b(simd_loop, Assembler::NE); + + __ bind(simd_small); + } + + // All-singing all-dancing memory copy. + // + // Copy count units of memory from s to d. The size of a unit is + // step, which can be positive or negative depending on the direction + // of copy. If is_aligned is false, we align the source address. + // + + void copy_memory(bool is_aligned, Register s, Register d, + Register count, int step) { + const int small_copy_size = 32; // 1 copy by ldm pays off alignment efforts and push/pop of temp set + const int granularity = uabs(step); + const Register tmp2 = rscratch2; + const Register t0 = r3; + Label small; + + assert_different_registers(s, d, count, tmp2, t0); + + __ mov(count, count, __ lsl(exact_log2(granularity))); + + if (step < 0) { + __ add(s, s, count); + __ add(d, d, count); + } + + __ cmp(count, small_copy_size); + __ b(small, Assembler::LT); + + // aligning + if (!is_aligned || (-4 < step && step < 0)) { + assert(3 <= small_copy_size, "may copy number of bytes required for alignment"); + if (step < 0) { + __ andr(tmp2, s, 3); + } else { + __ rsb(tmp2, s, 0); + __ andr(tmp2, tmp2, 3); + } + __ sub(count, count, tmp2); + copy_memory_small(s, d, tmp2, t0, is_aligned, step); + } + +#ifdef ASSERT + Label src_aligned; + __ tst(s, 3); + __ b(src_aligned, Assembler::EQ); + __ stop("src is not aligned"); + __ bind(src_aligned); +#endif + + // if destination is unaliged, copying by words is the only option + __ tst(d, 3); + __ b(small, Assembler::NE); + if (UseSIMDForMemoryOps && (VM_Version::features() & FT_AdvSIMD)) { + copy_memory_simd(s, d, count, tmp2, step, DoubleFloatRegSet::range(d0, d7), 64); + copy_memory_simd(s, d, count, tmp2, step, DoubleFloatRegSet::range(d0, d1), 16); + } else { + const RegSet tmp_set = RegSet::range(r4, r7); + const int tmp_set_size = 16; + Label ldm_loop; + + assert_different_registers(s, d, count, tmp2, r4, r5, r6, r7); + + __ cmp(count, tmp_set_size); + __ b(small, Assembler::LT); + + __ push(tmp_set, sp); + + __ mov(tmp2, count, __ lsr(exact_log2(tmp_set_size))); + __ sub(count, count, tmp2, __ lsl(exact_log2(tmp_set_size))); + + __ bind(ldm_loop); + + __ pld(Address(s, step < 0 ? -2 * tmp_set_size : tmp_set_size)); + + if (step < 0) { + __ ldmdb(s, tmp_set.bits()); + __ stmdb(d, tmp_set.bits()); + } else { + __ ldmia(s, tmp_set.bits()); + __ stmia(d, tmp_set.bits()); + } + + __ subs(tmp2, tmp2, 1); + __ b(ldm_loop, Assembler::NE); + + __ pop(tmp_set, sp); + } + + __ bind(small); + + Label words_loop, words_done; + __ cmp(count, BytesPerWord); + __ b(words_done, Assembler::LT); + + __ mov(tmp2, count, __ lsr(exact_log2(BytesPerWord))); + __ sub(count, count, tmp2, __ lsl(exact_log2(BytesPerWord))); + + __ bind(words_loop); + + Address src = step < 0 ? __ pre(s, -BytesPerWord) : __ post(s, BytesPerWord); + Address dst = step < 0 ? __ pre(d, -BytesPerWord) : __ post(d, BytesPerWord); + + __ pld(Address(s, step < 0 ? -2 * BytesPerWord : BytesPerWord)); + __ ldr(t0, src); + __ str(t0, dst); + __ subs(tmp2, tmp2, 1); + + __ b(words_loop, Assembler::NE); + + __ bind(words_done); + copy_memory_small(s, d, count, t0, is_aligned, step); + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 4-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + // Side Effects: + // disjoint_int_copy_entry is set to the no-overlap entry point + // used by generate_conjoint_int_oop_copy(). + // + address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry, + const char *name, bool dest_uninitialized = false) { + Register s = c_rarg0, d = c_rarg1, count = c_rarg2; + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + if (entry != NULL) { + *entry = __ pc(); + // caller can pass a 64-bit byte count here (from Unsafe.copyMemory) + BLOCK_COMMENT("Entry:"); + } + __ enter(VMFrameAPCS); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, d, count); + + if (is_oop) { + __ push(RegSet::of(d, count), sp); + } + + // copy memory likes to voluntary use rscratch2 and r3 + copy_memory(aligned, s, d, count, size); + + if (is_oop) { + __ pop(RegSet::of(d, count), sp); + __ sub(count, count, 1); // make an inclusive end pointer + __ lea(count, Address(d, count, lsl(exact_log2(size)))); + } + + // barriers are for oop arrays only, so don't worry about s, d and count being lost before + bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, rscratch2); + + __ leave(VMFrameAPCS); + __ b(lr); + return start; + } + + // Arguments: + // aligned - true => Input and output aligned on a HeapWord == 4-byte boundary + // ignored + // is_oop - true => oop array, so generate store check code + // name - stub name string + // + // Inputs: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // + // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let + // the hardware handle it. The two dwords within qwords that span + // cache line boundaries will still be loaded and stored atomicly. + // + address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, + address *entry, const char *name, + bool dest_uninitialized = false) { + Register s = c_rarg0, d = c_rarg1, count = c_rarg2; + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + __ cmp(d, s); + __ b(nooverlap_target, Assembler::LS); + + __ enter(VMFrameAPCS); + + DecoratorSet decorators = IN_HEAP | IS_ARRAY; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + if (aligned) { + decorators |= ARRAYCOPY_ALIGNED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, d, count); + + if (is_oop) { + __ push(RegSet::of(d, count), sp); + } + + // copy memory likes to voluntary use rscratch2 and r3 + copy_memory(aligned, s, d, count, -size); + + if (is_oop) { + __ pop(RegSet::of(d, count), sp); + __ sub(count, count, 1); // make an inclusive end pointer + __ lea(count, Address(d, count, lsl(exact_log2(size)))); + } + + // barriers are for oop arrays only, so don't worry about s, d and count being lost before + bs->arraycopy_epilogue(_masm, decorators, is_oop, d, count, rscratch2); + + __ leave(VMFrameAPCS); + __ b(lr); + return start; + } + + // Helper for generating a dynamic type check. + // Smashes rscratch1. + void generate_type_check(Register sub_klass, + Register super_check_offset, + Register super_klass, + Label& L_success) { + assert_different_registers(sub_klass, super_check_offset, super_klass); + + BLOCK_COMMENT("type_check:"); + + Label L_miss; + + __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL, + super_check_offset); + __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL); + + // Fall through on failure! + __ BIND(L_miss); + } + + // + // Generate checkcasting array copy stub + // + // Input: + // c_rarg0 - source array address + // c_rarg1 - destination array address + // c_rarg2 - element count, treated as ssize_t, can be zero + // c_rarg3 - size_t ckoff (super_check_offset) + // [sp] - oop ckval (super_klass) + // + // Output: + // r0 == 0 - success + // r0 == -1^K - failure, where K is partial transfer count + // + address generate_checkcast_copy(const char *name, address *entry, + bool dest_uninitialized = false) { + Label L_load_element, L_store_element, L_do_card_marks, L_done, L_done_pop; + + // Input registers (after setup_arg_regs) + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register count = c_rarg2; // elementscount + const Register ckoff = c_rarg3; // super_check_offset + + // Registers used as temps + const Register ckval = r4; // super_klass + const Register count_save = r5; // orig elementscount + const Register copied_oop = r6; // actual oop copied + const Register oop_klass = r7; // oop._klass + const Register start_to = lr; + + //--------------------------------------------------------------- + // Assembler stub will be used for this call to arraycopy + // if the two arrays are subtypes of Object[] but the + // destination array type is not equal to or a supertype + // of the source type. Each element must be separately + // checked. + + assert_different_registers(from, to, count, ckoff, ckval, + copied_oop, oop_klass, count_save); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + __ enter(VMFrameAPCS); // required for proper stackwalking of RuntimeStub frame + +#ifdef ASSERT + // caller guarantees that the arrays really are different + // otherwise, we would have to make conjoint checks + { Label L; + array_overlap_test(L);//, TIMES_OOP); + __ stop("checkcast_copy within a single array"); + __ bind(L); + } +#endif //ASSERT + + // Caller of this entry point must set up the argument registers. + if (entry != NULL) { + *entry = __ pc(); + BLOCK_COMMENT("Entry:"); + } + + // Empty array: Nothing to do. + __ cbz(count, L_done); + + // rscratch1 used as temp, rscratch2 can be killed by inc_counter_np + __ push(RegSet::of(count_save, copied_oop, oop_klass, ckval, rscratch1, rscratch2), sp); + __ ldr(ckval, Address(rfp, wordSize)); + +#ifdef ASSERT + BLOCK_COMMENT("assert consistent ckoff/ckval"); + // The ckoff and ckval must be mutually consistent, + // even though caller generates both. + { Label L; + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + __ ldr(rscratch1, Address(ckval, sco_offset)); + __ cmp(ckoff, rscratch1); + __ b(L, Assembler::EQ); + __ stop("super_check_offset inconsistent"); + __ bind(L); + } +#endif //ASSERT + + DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST; + bool is_oop = true; + if (dest_uninitialized) { + decorators |= IS_DEST_UNINITIALIZED; + } + + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->arraycopy_prologue(_masm, decorators, is_oop, to, count); + + // save the original count + __ mov(count_save, count); + + // save destination array start address + __ mov(start_to, to); + + // Copy from low to high addresses + __ b(L_load_element); + + // ======== begin loop ======== + // (Loop is rotated; its entry is L_load_element.) + // Loop control: + // for (; count != 0; count--) { + // copied_oop = load_heap_oop(from++); + // ... generate_type_check ...; + // store_heap_oop(to++, copied_oop); + // } + __ align(OptoLoopAlignment); + + __ BIND(L_store_element); + __ store_heap_oop(__ post(to, 4), copied_oop, noreg, noreg, AS_RAW); // store the oop + __ sub(count, count, 1); + __ cbz(count, L_do_card_marks); + + // ======== loop entry is here ======== + __ BIND(L_load_element); + __ load_heap_oop(copied_oop, __ post(from, 4), noreg, noreg, AS_RAW); // load the oop + __ cbz(copied_oop, L_store_element); + + __ load_klass(oop_klass, copied_oop);// query the object klass + generate_type_check(oop_klass, ckoff, ckval, L_store_element); + // ======== end loop ======== + + // It was a real error; we must depend on the caller to finish the job. + // Register count = remaining oops, count_orig = total oops. + // Emit GC store barriers for the oops we have copied and report + // their number to the caller. + + __ subs(count, count_save, count); // K = partially copied oop count + __ inv(count, count); // report (-1^K) to caller + __ b(L_done_pop, Assembler::EQ); + + __ BIND(L_do_card_marks); + __ add(to, to, -heapOopSize); // make an inclusive end pointer + bs->arraycopy_epilogue(_masm, decorators, is_oop, start_to, to, rscratch1); + + __ bind(L_done_pop); + inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); + __ pop(RegSet::of(count_save, copied_oop, oop_klass, ckval, rscratch1, rscratch2), sp); + + __ bind(L_done); + __ mov(r0, count); + __ leave(VMFrameAPCS); + __ b(lr); + return start; + } + + void generate_arraycopy_stubs() { + address entry; + + // jbyte + StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_copy(sizeof(jbyte), true, false, &entry, "arrayof_jbyte_disjoint_arraycopy"); + StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_copy(sizeof(jbyte), true, false, entry, NULL, "arrayof_jbyte_arraycopy"); + StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_copy(sizeof(jbyte), false, false, &entry, "jbyte_disjoint_arraycopy"); + StubRoutines::_jbyte_arraycopy = generate_conjoint_copy(sizeof(jbyte), false, false, entry, NULL, "jbyte_arraycopy"); + // jshort + StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_copy(sizeof(jshort), true, false, &entry, "arrayof_jshort_disjoint_arraycopy"); + StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_copy(sizeof(jshort), true, false, entry, NULL, "arrayof_jshort_arraycopy"); + StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_copy(sizeof(jshort), false, false, &entry, "jshort_disjoint_arraycopy"); + StubRoutines::_jshort_arraycopy = generate_conjoint_copy(sizeof(jshort), false, false, entry, NULL, "jshort_arraycopy"); + // jint (always aligned) + StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_copy(sizeof(jint), true, false, &entry, "arrayof_jint_disjoint_arraycopy"); + StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_copy(sizeof(jint), true, false, entry, NULL, "arrayof_jint_arraycopy"); + StubRoutines::_jint_disjoint_arraycopy = StubRoutines::_arrayof_jint_disjoint_arraycopy; + StubRoutines::_jint_arraycopy = StubRoutines::_arrayof_jint_arraycopy; + // jlong (always aligned) + StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_copy(sizeof(jlong), true, false, &entry, "arrayof_jlong_disjoint_arraycopy"); + StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_copy(sizeof(jlong), true, false, entry, NULL, "arrayof_jlong_arraycopy"); + StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy; + StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy; + // OOP (always aligned) + StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_copy(sizeof(jint), true, true, &entry, "arrayof_oop_disjoint_arraycopy"); + StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_copy(sizeof(jint), true, true, entry, NULL, "arrayof_oop_arraycopy"); + StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_copy(sizeof(jint), true, true, &entry, "arrayof_oop_disjoint_arraycopy_uninit", true); + StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_copy(sizeof(jint), true, true, entry, NULL, "arrayof_oop_arraycopy_uninit", true); + StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy; + StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy; + StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit; + StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit; + + StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", NULL); + StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL, true); + } + + void generate_math_stubs() { Unimplemented(); } + + // Safefetch stubs. + void generate_safefetch(const char* name, int size, address* entry, + address* fault_pc, address* continuation_pc) { + // safefetch signatures: + // int SafeFetch32(int* adr, int errValue); + // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); + // + // arguments: + // c_rarg0 = adr + // c_rarg1 = errValue + // + // result: + // PPC_RET = *adr or errValue + + StubCodeMark mark(this, "StubRoutines", name); + + // Entry point, pc or function descriptor. + *entry = __ pc(); + + // Load *adr into c_rarg1, may fault. + __ mov(c_rarg2, c_rarg0); + *fault_pc = __ pc(); + switch (size) { + case 4: + // int32_t + __ ldr(c_rarg0, Address(c_rarg2, 0)); + break; + default: + ShouldNotReachHere(); + } + __ b(lr); + // return errValue or *adr + *continuation_pc = __ pc(); + __ mov(r0, c_rarg1); + __ b(lr); + } + + /** + * Arguments: + * + * Inputs: + * c_rarg0 - int crc + * c_rarg1 - byte* buf + * c_rarg2 - int length + * + * Output: + * r0 - int crc result + * + * Preserves: + * r13 + * + */ + address generate_updateBytesCRC32(int is_crc32c) { + assert(!is_crc32c ? UseCRC32Intrinsics : UseCRC32CIntrinsics, "what are we doing here?"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", !is_crc32c ? "updateBytesCRC32" : "updateBytesCRC32C"); + + address start = __ pc(); + + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; // length + const Register table0 = c_rarg3; // crc_table address + const Register table1 = r4; + const Register table2 = r5; + const Register table3 = lr; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ push(RegSet::of(table1, table2, r6, r7, rscratch1, rscratch2), sp); + + __ kernel_crc32(crc, buf, len, + table0, table1, table2, table3, rscratch1, rscratch2, r6, is_crc32c); + + __ pop(RegSet::of(table1, table2, r6, r7, rscratch1, rscratch2), sp); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - x address + * c_rarg1 - x length + * c_rarg2 - y address + * c_rarg3 - y lenth + * sp[0] - z address + * sp[1] - z length + */ + address generate_multiplyToLen() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "multiplyToLen"); + + address start = __ pc(); + const Register x = r0; + const Register xlen = r1; + const Register y = r2; + const Register ylen = r3; + + const Register z = r4; + const Register zlen = r5; + + const Register tmp1 = r6; + const Register tmp2 = r7; + const Register tmp3 = r8; + const Register tmp4 = r9; + const Register tmp5 = r12; + const Register tmp6 = r14; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ push(RegSet::of(z, zlen, tmp1, tmp2)+RegSet::of(tmp3, tmp4, tmp5, tmp6), sp); + __ ldr(z, Address(rfp, 4)); + __ ldr(zlen, Address(rfp, 8)); + __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6); + __ pop(RegSet::of(z, zlen, tmp1, tmp2)+RegSet::of(tmp3, tmp4, tmp5, tmp6), sp); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - out + * c_rarg1 - int + * c_rarg2 - offset + * c_rarg3 - len + * sp[0] - k + */ + address generate_mulAdd() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "mulAdd"); + + address start = __ pc(); + const Register out = r0; + const Register in = r1; + const Register offset = r2; + const Register len = r3; + + const Register k = r4; + + const Register tmp1 = r6; + const Register tmp2 = r7; + const Register tmp3 = r8; + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + __ push(RegSet::of(k, tmp1, tmp2, tmp3), sp); + __ ldr(k, Address(rfp, 4)); + __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3); + __ pop(RegSet::of(k, tmp1, tmp2, tmp3), sp); + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } + + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + + address generate_aescrypt_encryptBlock() { + assert(UseAESIntrinsics, "what are we doing here?"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); + + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = c_rarg3; + const Register table1 = r4; + const Register t0 = r5; + const Register t1 = r6; + const Register t2 = r7; + const Register t3 = r8; + const Register t4 = r9; + const Register t5 = r10; + const Register t6 = r11; + const Register t7 = r12; + + BLOCK_COMMENT("Entry:"); + __ enter(); + + __ push(RegSet::of(r4, r5, r6, r7, r8), sp); + __ push(RegSet::of(r9, r10, r11, r12), sp); + __ kernel_aescrypt_encryptBlock(from, to, key, keylen, table1, + t0, t1, t2, t3, t4, t5, t6, t7); + __ pop(RegSet::of(r9, r10, r11, r12), sp); + __ pop(RegSet::of(r4, r5, r6, r7, r8), sp); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // + + address generate_aescrypt_decryptBlock() { + assert(UseAESIntrinsics, "what are we doing here?"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); + + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register keylen = c_rarg3; + const Register table1 = r4; + const Register t0 = r5; + const Register t1 = r6; + const Register t2 = r7; + const Register t3 = r8; + const Register t4 = r9; + const Register t5 = r10; + const Register t6 = r11; + const Register t7 = r12; + + BLOCK_COMMENT("Entry:"); + __ enter(); + + __ push(RegSet::of(r4, r5, r6, r7, r8), sp); + __ push(RegSet::of(r9, r10, r11, r12), sp); + __ kernel_aescrypt_decryptBlock(from, to, key, keylen, table1, + t0, t1, t2, t3, t4, t5, t6, t7); + __ pop(RegSet::of(r9, r10, r11, r12), sp); + __ pop(RegSet::of(r4, r5, r6, r7, r8), sp); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // x0 - input length + // + + address generate_cipherBlockChaining_encryptAESCrypt(bool len_on_stack) { + assert(UseAESIntrinsics && UseNeon, "what are we doing here?"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); + + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len = r4; // src len (must be multiple of blocksize 16) + const Register keylen = r5; + const Register table = r6; + const Register t0 = r7; + const Register t1 = r8; + const Register t2 = r9; + const Register t3 = r10; + const Register t4 = r11; + const Register t5 = r12; + const Register t6 = lr; + + BLOCK_COMMENT("Entry:"); + __ enter(); + + __ push(RegSet::of(r4, r5, r6, r7, r8), sp); + __ push(RegSet::of(r9, r10, r11, r12), sp); + __ vstmdb_f64(sp, 0xff00); // d8-d15 are callee save registers + + if (len_on_stack) + __ ldr(len, Address(rfp, wordSize)); + __ kernel_aescrypt_encrypt(from, to, key, rvec, len, keylen, table, + t0, t1, t2, t3, t4, t5, t6); + + __ vldmia_f64(sp, 0xff00); + __ pop(RegSet::of(r9, r10, r11, r12), sp); + __ pop(RegSet::of(r4, r5, r6, r7, r8), sp); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - destination byte array address + // c_rarg2 - K (key) in little endian int array + // c_rarg3 - r vector byte array address + // c_rarg4 - input length + // + // Output: + // x0 - input length + // + + address generate_cipherBlockChaining_decryptAESCrypt(bool len_on_stack) { + assert(UseAESIntrinsics && UseNeon, "what are we doing here?"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); + + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register to = c_rarg1; // destination array address + const Register key = c_rarg2; // key array address + const Register rvec = c_rarg3; // r byte array initialized from initvector array address + // and left with the results of the last encryption block + const Register len = r4; // src len (must be multiple of blocksize 16) + const Register keylen = r5; + const Register table = r6; + const Register t0 = r7; + const Register t1 = r8; + const Register t2 = r9; + const Register t3 = r10; + const Register t4 = r11; + const Register t5 = r12; + const Register t6 = lr; + + BLOCK_COMMENT("Entry:"); + __ enter(); + + __ push(RegSet::of(r4, r5, r6, r7, r8), sp); + __ push(RegSet::of(r9, r10, r11, r12), sp); + __ vstmdb_f64(sp, 0xff00); // d8-d15 are callee save registers + + if (len_on_stack) + __ ldr(len, Address(rfp, wordSize)); + __ kernel_aescrypt_decrypt(from, to, key, rvec, len, keylen, table, + t0, t1, t2, t3, t4, t5, t6); + + __ vldmia_f64(sp, 0xff00); + __ pop(RegSet::of(r9, r10, r11, r12), sp); + __ pop(RegSet::of(r4, r5, r6, r7, r8), sp); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - state array + + address generate_sha_implCompress() { + assert(UseSHA1Intrinsics, "what are we doing here?"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "sha_implCompress"); + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register state = c_rarg1; // state array address + const Register t0 = c_rarg2; + const Register t1 = c_rarg3; + const Register t2 = r4; + const Register t3 = r5; + const Register t4 = r6; + const Register t5 = r7; + const Register t6 = r8; + const Register t7 = r9; + const Register t8 = r10; + const Register t9 = r11; + const Register t10 = r12; + DoubleFloatRegSet _fToSave = DoubleFloatRegSet::range(d0, d15); + + BLOCK_COMMENT("Entry:"); + __ enter(); + + __ push(RegSet::of(r4, r5, r6, r7, r8), sp); + __ push(RegSet::of(r9, r10, r11, r12), sp); + __ vstmdb_f64(sp, _fToSave.bits()); + + __ kernel_sha_implCompress(from, state, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10); + + __ vldmia_f64(sp, _fToSave.bits(), true); + __ pop(RegSet::of(r9, r10, r11, r12), sp); + __ pop(RegSet::of(r4, r5, r6, r7, r8), sp); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - state array + + address generate_sha256_implCompress() { + assert(UseSHA256Intrinsics, "what are we doing here?"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "sha256_implCompress"); + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register state = c_rarg1; // state array address + const Register t0 = c_rarg2; + const Register t1 = c_rarg3; + const Register t2 = r4; + const Register t3 = r5; + const Register t4 = r6; + const Register t5 = r7; + const Register t6 = r8; + const Register t7 = r9; + const Register t8 = r10; + const Register t9 = r11; + const Register t10 = r12; + const Register t11 = lr; + DoubleFloatRegSet _fToSave1 = DoubleFloatRegSet::range(d0, d15); + DoubleFloatRegSet _fToSave2 = DoubleFloatRegSet::range(d16,d31); + + BLOCK_COMMENT("Entry:"); + __ enter(); + + __ push(RegSet::of(r4, r5, r6, r7, r8), sp); + __ push(RegSet::of(r9, r10, r11, r12, lr), sp); + __ vstmdb_f64(sp, _fToSave1.bits()); + __ vstmdb_f64(sp, _fToSave2.bits()); + + __ kernel_sha256_implCompress(from, state, t0, t1, + t2, t3, t4, t5, t6, t7, t8, t9, t10, t11); + + __ vldmia_f64(sp, _fToSave2.bits(), true); + __ vldmia_f64(sp, _fToSave1.bits(), true); + __ pop(RegSet::of(r9, r10, r11, r12, lr), sp); + __ pop(RegSet::of(r4, r5, r6, r7, r8), sp); + + __ leave(); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Inputs: + // c_rarg0 - source byte array address + // c_rarg1 - state array + + address generate_sha512_implCompress() { + assert(UseSHA512Intrinsics, "what are we doing here?"); + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "sha512_implCompress"); + address start = __ pc(); + + const Register from = c_rarg0; // source array address + const Register state = c_rarg1; // state array address + const Register t0 = c_rarg2; + const Register t1 = c_rarg3; + DoubleFloatRegSet _fToSave1 = DoubleFloatRegSet::range(d0, d15); + DoubleFloatRegSet _fToSave2 = DoubleFloatRegSet::range(d16,d31); + + + BLOCK_COMMENT("Entry:"); + __ enter(); + + __ vstmdb_f64(sp, _fToSave1.bits()); + __ vstmdb_f64(sp, _fToSave2.bits()); + + __ kernel_sha512_implCompress(from, state, t0, t1); + + __ vldmia_f64(sp, _fToSave2.bits(), true); + __ vldmia_f64(sp, _fToSave1.bits(), true); + + __ leave(); + __ ret(lr); + + return start; + } + + // Continuation point for throwing of implicit exceptions that are + // not handled in the current activation. Fabricates an exception + // oop and initiates normal exception dispatching in this + // frame. Since we need to preserve callee-saved values (currently + // only for C2, but done for C1 as well) we need a callee-saved oop + // map and therefore have to make these stubs into RuntimeStubs + // rather than BufferBlobs. If the compiler needs all registers to + // be preserved between the fault point and the exception handler + // then it must assume responsibility for that in + // AbstractCompiler::continuation_for_implicit_null_exception or + // continuation_for_implicit_division_by_zero_exception. All other + // implicit exceptions (e.g., NullPointerException or + // AbstractMethodError on entry) are either at call sites or + // otherwise assume that stack unwinding will be initiated, so + // caller saved registers were assumed volatile in the compiler. + +#undef __ +#define __ masm-> + + address generate_throw_exception(const char* name, + address runtime_entry, + Register arg1 = noreg, + Register arg2 = noreg) { + // Information about frame layout at time of blocking runtime call. + // Note that we only have to preserve callee-saved registers since + // the compilers are responsible for supplying a continuation point + // if they expect all registers to be preserved. + // n.b. aarch32 asserts that frame::arg_reg_save_area_bytes == 0 + const int framesize = frame::get_frame_size(); + const int insts_size = 512; + const int locs_size = 64; + + CodeBuffer code(name, insts_size, locs_size); + OopMapSet* oop_maps = new OopMapSet(); + MacroAssembler* masm = new MacroAssembler(&code); + + address start = __ pc(); + + // This is an inlined and slightly modified version of call_VM + // which has the ability to fetch the return PC out of + // thread-local storage and also sets up last_Java_sp slightly + // differently than the real call_VM + + __ enter(); // Save at least FP and LR before call + + assert(is_even(framesize), "sp not 8-byte aligned"); + + int frame_complete = __ pc() - start; + + // Set up last_Java_sp and last_Java_fp + address the_pc = __ pc(); + __ set_last_Java_frame(sp, rfp, (address)NULL, rscratch1); + + // Call runtime + if (arg1 != noreg) { + assert(arg2 != c_rarg1, "clobbered"); + __ mov(c_rarg1, arg1); + } + if (arg2 != noreg) { + __ mov(c_rarg2, arg2); + } + __ mov(c_rarg0, rthread); + BLOCK_COMMENT("call runtime_entry"); + __ align_stack(); + __ mov(rscratch1, runtime_entry); + __ bl(rscratch1); + + // Generate oop map + OopMap* map = new OopMap(framesize, 0); + + oop_maps->add_gc_map(the_pc - start, map); + + __ reset_last_Java_frame(true); + __ maybe_isb(); + + __ leave(); + + // check for pending exceptions +#ifdef ASSERT + Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbnz(rscratch1, L); + __ should_not_reach_here(); + __ bind(L); +#endif // ASSERT + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + + // codeBlob framesize is in words (not VMRegImpl::slot_size) + RuntimeStub* stub = + RuntimeStub::new_runtime_stub(name, + &code, + frame_complete, + framesize, + oop_maps, false); + return stub->entry_point(); + } + + class MontgomeryMultiplyGenerator : public MacroAssembler { + + Register Pa_base, Pb_base, Pn_base, Pm_base, Rlen, Ri, Rj, Pa, Pb, Pn, Pm; + FloatRegister inv, Ra, Rb, Rm, Rn, RabAB, RaBAb, s0, s1, s2, tmp; + + RegSet _toSave; + DoubleFloatRegSet _fToSave; + bool _squaring; + + public: + MontgomeryMultiplyGenerator (Assembler *as, bool squaring) + : MacroAssembler(as->code()), _squaring(squaring) { + + // Register allocation + + Register reg = c_rarg0; + + Pa_base = reg++; // Argument registers + if (squaring) + Pb_base = Pa_base; + else + Pb_base = reg++; + Pn_base = reg++; + Rlen= reg++; + Pm_base = r4; + + Ri = r5; // Inner and outer loop indexes. + Rj = r6; + + Pa = r7; // Pointers to the current/next digit of a, b, n, and m. + Pb = r8; + Pm = r9; + Pn = r12; + + _toSave = RegSet::range(r4, r8) + RegSet::of(r9, r12); + + // Now NEON registers + + // Working registers: + Ra = d0; // The current digit of a, b, n, and m. + Rb = d1; // The values are stored as read, that is high and + Rm = d2; // low 32-bit parts are exchanged + Rn = d3; + + // Three registers which form a triple-precision accumulator. + // For sake of performance these are 128-bit and are overlapping + // (hence the name is s, not t). The schema is the following: + // w4|w3|w2|w1|w0| (32-bit words) + // s0 lo: |**|**| + // s0 hi: |**|**| + // s1 lo: |**|**| + // s1 hi: |**|**| + // s2 lo: |**|**| + // s2 hi: |**|**| + // the idea is that each of 64-bit s registers accumulate only 32-bit + // numbers and hence never needs carry operation + + s0 = q2; + s1 = q3; + s2 = q4; + + RabAB = q5; // Product registers: low, high and middle parts + RaBAb = q6; // of a*b and m*n. hi(A)*hi(B) is the same quad as lo(a)*lo(b) + + inv = d14; + tmp = d15; + + _fToSave = DoubleFloatRegSet::range(d8, tmp); + } + + private: + void save_regs() { + vstmdb_f64(sp, _fToSave.bits()); + push(_toSave, sp); + } + + void restore_regs() { + pop(_toSave, sp); + vldmia_f64(sp, _fToSave.bits(), true); + } + + template + void unroll_2(Register count, T block) { + Label loop, end, odd; + tbnz(count, 0, odd); + cbz(count, end); + align(16); + bind(loop); + (this->*block)(); + bind(odd); + (this->*block)(); + subs(count, count, 2); + b(loop, Assembler::GT); + bind(end); + } + + void pre1(Register i) { + block_comment("pre1"); + // Pa = Pa_base; + // Pb = Pb_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + lea(Pa, Address(Pa_base)); + lea(Pb, Address(Pb_base, i, lsl(LogBytesPerLong), Address::SUB)); + lea(Pm, Address(Pm_base)); + lea(Pn, Address(Pn_base, i, lsl(LogBytesPerLong), Address::SUB)); + + vld1_64(Ra, Address(Pa), Assembler::ALIGN_STD); + vld1_64(Rb, Address(Pb), Assembler::ALIGN_STD); + vld1_64(Rm, Address(Pm), Assembler::ALIGN_STD); + vld1_64(Rn, Address(Pn), Assembler::ALIGN_STD); + } + + // The core multiply-accumulate step of a Montgomery + // multiplication. The idea is to schedule operations as a + // pipeline so that instructions with long latencies (loads and + // multiplies) have time to complete before their results are + // used. This most benefits in-order implementations of the + // architecture but out-of-order ones also benefit. + void step() { + block_comment("step"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + sub(Pm, Pm, BytesPerLong); + add(Pn, Pn, BytesPerLong); + vmul_acc1(Rm, Rn, tmp, RabAB, RaBAb); + vld1_64(Rm, Address(Pm), Assembler::ALIGN_STD); + vld1_64(Rn, Address(Pn), Assembler::ALIGN_STD); + vmul_acc2(tmp, RabAB, RaBAb); + + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + sub(Pa, Pa, BytesPerLong); + add(Pb, Pb, BytesPerLong); + vmul_acc1(Ra, Rb, tmp, RabAB, RaBAb); + vld1_64(Ra, Address(Pa), Assembler::ALIGN_STD); + vld1_64(Rb, Address(Pb), Assembler::ALIGN_STD); + vmul_acc2(tmp, RabAB, RaBAb); + } + + void post1() { + FloatRegister t0 = RabAB; + + block_comment("post1"); + + // MACC(Ra, Rb, t0, t1, t2); + vmul_acc1(Ra, Rb, tmp, RabAB, RaBAb); + vmul_acc2(tmp, RabAB, RaBAb); + + // *Pm = Rm = t0 * inv; + vmul_fin(t0, tmp); + vmul_simple(Rm, t0, inv, RaBAb); // RaBAb is tmp + vrev64_64_32(Rm, Rm); // write in reversed, big-endian format + vst1_64(Rm, Address(Pm), ALIGN_STD); + + // MACC(Rm, Rn, t0, t1, t2); + vmul_acc1(Rm, Rn, tmp, RabAB, RaBAb); + vmul_acc2(tmp, RabAB, RaBAb); + +#ifndef PRODUCT + // assert(t0 == 0, "broken Montgomery multiply"); + { + vmul_fin(t0, tmp); + Label ok; + push(RegSet::of(Ri, Rj), sp); + vmov_f64(Ri, Rj, t0); + orr(Ri, Ri, Rj); + cbz(Ri, ok); { + stop("broken Montgomery multiply"); + } bind(ok); + pop(RegSet::of(Ri, Rj), sp); + } +#endif + + // t0 = t1; t1 = t2; t2 = 0; + shift_t(RabAB); + } + + void pre2(Register i, Register len) { + block_comment("pre2"); + // Pa = Pa_base + i-len; + // Pb = Pb_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + // Rj == i-len + sub(Rj, i, len); + + lea(Pa, Address(Pa_base, Rj, lsl(LogBytesPerLong), Address::SUB)); + lea(Pb, Address(Pb_base, len, lsl(LogBytesPerLong), Address::SUB)); + lea(Pm, Address(Pm_base, Rj, lsl(LogBytesPerLong), Address::SUB)); + lea(Pn, Address(Pn_base, len, lsl(LogBytesPerLong), Address::SUB)); + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + sub(Pa, Pa, BytesPerLong); + add(Pb, Pb, BytesPerLong); + sub(Pm, Pm, BytesPerLong); + add(Pn, Pn, BytesPerLong); + + vld1_64(Ra, Address(Pa), ALIGN_STD); + vld1_64(Rb, Address(Pb), ALIGN_STD); + vld1_64(Rm, Address(Pm), ALIGN_STD); + vld1_64(Rn, Address(Pn), ALIGN_STD); + } + + void post2(Register i, Register len) { + FloatRegister t0 = RabAB; + + block_comment("post2"); + + vmul_fin(t0, tmp); + + // As soon as we know the least significant digit of our result, + // store it. + // Pm_base[i-len] = t0; + sub(Rj, i, len); + lea(Rj, Address(Pm_base, Rj, lsl(LogBytesPerLong), Address::SUB)); + vrev64_64_32(t0, t0); + vst1_64(t0, Address(Rj), ALIGN_STD); + + // t0 = t1; t1 = t2; t2 = 0; + shift_t(RabAB); + } + + // A carry in t0 after Montgomery multiplication means that we + // should subtract multiples of n from our result in m. We'll + // keep doing that until there is no carry. ARM registers are used + // for this operation, this is faster than using NEON + void normalize(Register len, Register t0lo, Register t0hi, + Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5) { + block_comment("normalize"); + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + Label loop, post, again; + Register cnt = tmp1, i = tmp2, m = tmp3, n = tmp4, flags = tmp5; + // let them point to last 32-bit element now + add(Pn_base, Pn_base, BytesPerInt); + add(Pm_base, Pm_base, BytesPerInt); + orrs(n, t0lo, t0hi); + b(post, EQ); { + bind(again); { + mov(i, 0); + mov(cnt, len); // each loop processes 64 bits + ldr(m, Address(Pm_base)); + ldr(n, Address(Pn_base)); + cmp(n, n); // set carry flag, i.e. no borrow + mrs(flags); + align(16); + bind(loop); { + msr(flags, true, false); + sbcs(m, m, n); + str(m, Address(Pm_base, i, lsl(LogBytesPerWord), Address::SUB)); + add(i, i, 1); + ldr(n, Address(Pn_base, i, lsl(LogBytesPerWord), Address::SUB)); + ldr(m, Address(Pm_base, i, lsl(LogBytesPerWord), Address::SUB)); + sbcs(m, m, n); + mrs(flags); + str(m, Address(Pm_base, i, lsl(LogBytesPerWord), Address::SUB)); + add(i, i, 1); + ldr(n, Address(Pn_base, i, lsl(LogBytesPerWord), Address::SUB)); + ldr(m, Address(Pm_base, i, lsl(LogBytesPerWord), Address::SUB)); + sub(cnt, cnt, 1); + } cbnz(cnt, loop); + msr(flags, true, false); + sbcs(t0lo, t0lo, 0); + sbc(t0hi, t0hi, 0); + orrs(n, t0lo, t0hi); + } b(again, NE); + } bind(post); + } + + void step_squaring() { + // An extra ACC for A*B + step(); + vmul_acc2(tmp, RabAB, RaBAb, false); + } + + void last_squaring(Register i) { + Label dont; + // if ((i & 1) == 0) { + tbnz(i, 0, dont); { + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + sub(Pa, Pa, BytesPerLong); + add(Pb, Pb, BytesPerLong); + vmul_acc1(Ra, Rb, tmp, RabAB, RaBAb); + vmul_acc2(tmp, RabAB, RaBAb); + } bind(dont); + } + + void extra_step_squaring() { + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + sub(Pm, Pm, BytesPerLong); + add(Pn, Pn, BytesPerLong); + vmul_acc1(Rm, Rn, tmp, RabAB, RaBAb); + vld1_64(Rm, Address(Pm), Assembler::ALIGN_STD); + vld1_64(Rn, Address(Pn), Assembler::ALIGN_STD); + vmul_acc2(tmp, RabAB, RaBAb); + } + + void post1_squaring() { + FloatRegister t0 = RabAB; + + // *Pm = Rm = t0 * inv; + vmul_fin(t0, tmp); + vmul_simple(Rm, t0, inv, RaBAb); // RaBAb is tmp + vrev64_64_32(Rm, Rm); + vst1_64(Rm, Address(Pm), ALIGN_STD); + + // MACC(Rm, Rn, t0, t1, t2); + vmul_acc1(Rm, Rn, tmp, RabAB, RaBAb); + vmul_acc2(tmp, RabAB, RaBAb); + +#ifndef PRODUCT + // assert(t0 == 0, "broken Montgomery multiply"); + { + vmul_fin(t0, tmp); + Label ok; + push(RegSet::of(Ri, Rj), sp); + vmov_f64(Ri, Rj, t0); + orr(Ri, Ri, Rj); + cbz(Ri, ok); { + stop("broken Montgomery square"); + } bind(ok); + pop(RegSet::of(Ri, Rj), sp); + } +#endif + + // t0 = t1; t1 = t2; t2 = 0; + shift_t(RabAB); + } + + /** + * Initializes the accumulators + */ + void vmul_init() { + vmov_128_32(s0, 0); + vmov_128_32(s1, 0); + vmov_128_32(s2, 0); + } + + /** + * Multiplies unsigned 64-bit a by unsigned 64-bit b accumulating the + * result into temp array (s0-s2). temp array is not converged into + * resulting number. See vmul_fin. + * Performance critical part. + * @param a first operand + * @param b second operand + */ + void vmul_acc1(FloatRegister a, FloatRegister b, FloatRegister tmp, FloatRegister RabAB, FloatRegister RaBAb) { + vrev64_64_32(tmp, b); + vmull_32u(RabAB, a, b); + vmull_32u(RaBAb, a, tmp); + } + + void vmul_acc2(FloatRegister tmp, FloatRegister RabAB, FloatRegister RaBAb, bool trn_aBAb = true) { + // words 2-0 of accumulator + vaddw_32u(s0, s0, RabAB->successor(FloatRegisterImpl::DOUBLE)); + if (trn_aBAb) { + // words 3-1 of accumulator. phase 1 + vtrn_64_32(RaBAb, RaBAb->successor(FloatRegisterImpl::DOUBLE)); + } + // words 4-2 of accumulator + vaddw_32u(s2, s2, RabAB); + // words 3-1 of accumulator. phase 2 + vpadal_128_u32(s1, RaBAb); + } + + /** + * Simple unsigned 64-bit multiply a by b. + * Least significant 64 bits of result are written into register res, + * the rest are discarded. + * @param res 64-bit result + * @param a 64-bit operand + * @param b 64-bit operand + * @param tmp 128-bit temporary register + */ + void vmul_simple(FloatRegister res, FloatRegister a, FloatRegister b, FloatRegister tmp) { + FloatRegister tmp2 = tmp->successor(FloatRegisterImpl::DOUBLE); + vmull_32u(tmp, a, b); + vrev64_64_32(tmp2, b); + vmul_64_32(tmp2, a, tmp2); + vpaddl_64_u32(tmp2, tmp2); + vshl_64_64(tmp2, tmp2, 32); + vadd_64_64(res, tmp, tmp2); + } + + /** + * Converges the temp array and returns least significant 64 bits of the result. + * @param t0 the register to write the least significant 64 bits of result + * @param tmp 64-bit temporary register + */ + void vmul_fin(FloatRegister t0, FloatRegister tmp1) { + FloatRegister abLow = s0; + FloatRegister abHigh = s0->successor(FloatRegisterImpl::DOUBLE); + FloatRegister aBAbLow = s1; + + // words 0 and 1 + vshr_64_u64(tmp1, abLow, 32); + vadd_64_64(tmp1, tmp1, abHigh); + vadd_64_64(tmp1, tmp1, aBAbLow); + vmov_64(t0, abLow); + vsli_64_64(t0, tmp1, 32); + } + + /** + * Performs t0 = t1; t1 = t2; t2 = 0; represented as s0-s2. + * @param tmp 128-bit register + */ + void shift_t(FloatRegister tmp) { + FloatRegister s0hi = s0->successor(FloatRegisterImpl::DOUBLE); + FloatRegister s1hi = s1->successor(FloatRegisterImpl::DOUBLE); + FloatRegister s2hi = s2->successor(FloatRegisterImpl::DOUBLE); + FloatRegister tmphi = tmp->successor(FloatRegisterImpl::DOUBLE); + vshr_64_u64(s0, s0, 32); + vaddl_32u(tmp, s1, s0hi); + vadd_64_64(s0, s0, tmp); + vshr_64_u64(s0, s0, 32); + vadd_64_64(tmphi, s0, tmphi); + vaddl_32u(s0, s1hi, s2); + vadd_64_64(s0, s0, tmphi); + vmov_64(s1, s2hi); + vmov_64_32(s1hi, 0); + vmov_128_32(s2, 0); + } + + public: + /** + * Fast Montgomery multiplication. The derivation of the + * algorithm is in A Cryptographic Library for the Motorola + * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. + * + * Arguments: + * + * Inputs for multiplication: + * c_rarg0 - int64 array elements a + * c_rarg1 - int64 array elements b + * c_rarg2 - int64 array elements n (the modulus) + * c_rarg3 - int64 length + * [sp] - int64 inv + * [sp+8] - int64 array elements m (the result) + * + */ + address generate_multiply() { + Label nothing; + align(CodeEntryAlignment); + address entry = pc(); + + cbz(Rlen, nothing); + + enter(); + + // Push all call-saved registers + save_regs(); + + // load inv and m array pointer + add(Ri, rfp, 4); + vld1_64(inv, Address(Ri), ALIGN_STD); + ldr(Pm_base, Address(Ri, BytesPerLong)); + + lsr(Rlen, Rlen, 1); // length in longwords = len/2 + + // let Px_base point on last 64-bit element of an array + add(Pa_base, Pa_base, Rlen, lsl(LogBytesPerLong)); + sub(Pa_base, Pa_base, BytesPerLong); + if (!_squaring) { + add(Pb_base, Pb_base, Rlen, lsl(LogBytesPerLong)); + sub(Pb_base, Pb_base, BytesPerLong); + } + add(Pn_base, Pn_base, Rlen, lsl(LogBytesPerLong)); + sub(Pn_base, Pn_base, BytesPerLong); + add(Pm_base, Pm_base, Rlen, lsl(LogBytesPerLong)); + sub(Pm_base, Pm_base, BytesPerLong); + +#ifndef PRODUCT + // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + { + // Pn, Pm and s0 are used as a temporary + vld1_64(Rn, Address(Pn_base), Assembler::ALIGN_STD); + vrev64_64_32(Rn, Rn); + vmul_simple(tmp, Rn, inv, s0); + vmov_f64(Pm, Pn, tmp); + andr(Pm, Pm, Pn); + cmn(Pm, 1); + Label ok; + b(ok, EQ); { + stop("broken inverse in Montgomery multiply"); + } bind(ok); + } +#endif + + vmul_init(); + + block_comment("for (int i = 0; i < len; i++) {"); + mov(Ri, 0); { + Label loop, end; + cmp(Ri, Rlen); + b(end, Assembler::GE); + + bind(loop); + pre1(Ri); + + block_comment(" for (j = i; j; j--) {"); { + mov(Rj, Ri); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post1(); + add(Ri, Ri, 1); + cmp(Ri, Rlen); + b(loop, Assembler::LT); + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mov(Ri, Rlen); { + Label loop, end; + cmp(Ri, Rlen, lsl(1)); + b(end, Assembler::GE); + + bind(loop); + pre2(Ri, Rlen); + + block_comment(" for (j = len*2-i-1; j; j--) {"); { + lsl(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + sub(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step); + } block_comment(" } // j"); + + post2(Ri, Rlen); + add(Ri, Ri, 1); + cmp(Ri, Rlen, lsl(1)); + b(loop, Assembler::LT); + bind(end); + } + block_comment("} // i"); + + FloatRegister t0 = RabAB; // use as temporary + vmul_fin(t0, tmp); + vmov_f64(Pa, Pb, t0); + normalize(Rlen, Pa, Pb, Pm, Pn, Ri, Rj, Pa_base); + + restore_regs(); + leave(); + bind(nothing); + ret(lr); + + return entry; + } + // In C, approximately: + + // void + // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[], + // unsigned long Pn_base[], unsigned long Pm_base[], + // unsigned long inv, int len) { + // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + // unsigned long *Pa, *Pb, *Pn, *Pm; + // unsigned long Ra, Rb, Rn, Rm; + + // int i; + + // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); + + // for (i = 0; i < len; i++) { + // int j; + + // Pa = Pa_base; + // Pb = Pb_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + + // int iters = i; + // for (j = 0; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + + // assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // *Pm = Rm = t0 * inv; + // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + + // assert(t0 == 0, "broken Montgomery multiply"); + + // t0 = t1; t1 = t2; t2 = 0; + // } + + // for (i = len; i < 2*len; i++) { + // int j; + + // Pa = Pa_base + i-len; + // Pb = Pb_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + + // int iters = len*2-i-1; + // for (j = i-len+1; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be"); + // MACC(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + + // Pm_base[i-len] = t0; + // t0 = t1; t1 = t2; t2 = 0; + // } + + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + // } + + /** + * Fast Montgomery squaring. This uses asymptotically 25% fewer + * multiplies than Montgomery multiplication so it should be up to + * 25% faster. However, its loop control is more complex and it + * may actually run slower on some machines. + * + * Arguments: + * + * Inputs: + * c_rarg0 - int64 array elements a + * c_rarg1 - int64 array elements n (the modulus) + * c_rarg2 - int length + * [sp] - int inv + * [sp+8] - int array elements m (the result) + * + */ + address generate_square() { + align(CodeEntryAlignment); + address entry = pc(); + + enter(); + + save_regs(); + + // load inv and m array pointer + add(Ri, rfp, 4); + vld1_64(inv, Address(Ri), ALIGN_STD); + ldr(Pm_base, Address(Ri, BytesPerLong)); + + lsr(Rlen, Rlen, 1); // length in longwords = len/2 + + // let Px_base point on last 64-bit element of an array + add(Pa_base, Pa_base, Rlen, lsl(LogBytesPerLong)); + sub(Pa_base, Pa_base, BytesPerLong); + add(Pn_base, Pn_base, Rlen, lsl(LogBytesPerLong)); + sub(Pn_base, Pn_base, BytesPerLong); + add(Pm_base, Pm_base, Rlen, lsl(LogBytesPerLong)); + sub(Pm_base, Pm_base, BytesPerLong); + +#ifndef PRODUCT + // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); + { + // Pn, Pm and s0 are used as a temporary + vld1_64(Rn, Address(Pn_base), Assembler::ALIGN_STD); + vrev64_64_32(Rn, Rn); + vmul_simple(tmp, Rn, inv, s0); + vmov_f64(Pm, Pn, tmp); + andr(Pm, Pm, Pn); + cmn(Pm, 1); + Label ok; + b(ok, EQ); { + stop("broken inverse in Montgomery square"); + } bind(ok); + } +#endif + + vmul_init(); + + block_comment("for (int i = 0; i < len; i++) {"); + mov(Ri, 0); { + Label loop, end; + bind(loop); + cmp(Ri, Rlen); + b(end, GE); + + pre1(Ri); + + block_comment("for (j = (i+1)/2; j; j--) {"); { + add(Rj, Ri, 1); + lsr(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = i/2; j; j--) {"); { + lsr(Rj, Ri, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post1_squaring(); + add(Ri, Ri, 1); + cmp(Ri, Rlen); + b(loop, LT); + + bind(end); + block_comment("} // i"); + } + + block_comment("for (int i = len; i < 2*len; i++) {"); + mov(Ri, Rlen); { + Label loop, end; + bind(loop); + cmp(Ri, Rlen, lsl(1)); + b(end, GE); + + pre2(Ri, Rlen); + + block_comment(" for (j = (2*len-i-1)/2; j; j--) {"); { + lsl(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + sub(Rj, Rj, 1); + lsr(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring); + } block_comment(" } // j"); + + last_squaring(Ri); + + block_comment(" for (j = (2*len-i)/2; j; j--) {"); { + lsl(Rj, Rlen, 1); + sub(Rj, Rj, Ri); + lsr(Rj, Rj, 1); + unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring); + } block_comment(" } // j"); + + post2(Ri, Rlen); + add(Ri, Ri, 1); + cmp(Ri, Rlen, lsl(1)); + + b(loop, LT); + bind(end); + block_comment("} // i"); + } + + FloatRegister t0 = RabAB; // use as temporary + vmul_fin(t0, tmp); + vmov_f64(Pa, Pb, t0); + normalize(Rlen, Pa, Pb, Pm, Pn, Ri, Rj, Pa_base); + + restore_regs(); + leave(); + ret(lr); + + return entry; + } + // In C, approximately: + + // void + // montgomery_square(unsigned long Pa_base[], unsigned long Pn_base[], + // unsigned long Pm_base[], unsigned long inv, int len) { + // unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator + // unsigned long *Pa, *Pb, *Pn, *Pm; + // unsigned long Ra, Rb, Rn, Rm; + + // int i; + + // assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply"); + + // for (i = 0; i < len; i++) { + // int j; + + // Pa = Pa_base; + // Pb = Pa_base + i; + // Pm = Pm_base; + // Pn = Pn_base + i; + + // Ra = *Pa; + // Rb = *Pb; + // Rm = *Pm; + // Rn = *Pn; + + // int iters = (i+1)/2; + // for (j = 0; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be"); + // MACC2(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + // if ((i & 1) == 0) { + // assert(Ra == Pa_base[j], "must be"); + // MACC(Ra, Ra, t0, t1, t2); + // } + // iters = i/2; + // assert(iters == i-j, "must be"); + // for (; iters--; j++) { + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + + // *Pm = Rm = t0 * inv; + // assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + + // assert(t0 == 0, "broken Montgomery multiply"); + + // t0 = t1; t1 = t2; t2 = 0; + // } + + // for (i = len; i < 2*len; i++) { + // int start = i-len+1; + // int end = start + (len - start)/2; + // int j; + + // Pa = Pa_base + i-len; + // Pb = Pa_base + len; + // Pm = Pm_base + i-len; + // Pn = Pn_base + len; + + // Ra = *++Pa; + // Rb = *--Pb; + // Rm = *++Pm; + // Rn = *--Pn; + + // int iters = (2*len-i-1)/2; + // assert(iters == end-start, "must be"); + // for (j = start; iters--; j++) { + // assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be"); + // MACC2(Ra, Rb, t0, t1, t2); + // Ra = *++Pa; + // Rb = *--Pb; + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + // if ((i & 1) == 0) { + // assert(Ra == Pa_base[j], "must be"); + // MACC(Ra, Ra, t0, t1, t2); + // } + // iters = (2*len-i)/2; + // assert(iters == len-j, "must be"); + // for (; iters--; j++) { + // assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be"); + // MACC(Rm, Rn, t0, t1, t2); + // Rm = *++Pm; + // Rn = *--Pn; + // } + // Pm_base[i-len] = t0; + // t0 = t1; t1 = t2; t2 = 0; + // } + + // while (t0) + // t0 = sub(Pm_base, Pn_base, t0, len); + // } + }; + + // Initialization + void generate_initial() { + // Generate initial stubs and initializes the entry points + + // entry points that exist in all platforms Note: This is code + // that could be shared among different platforms - however the + // benefit seems to be smaller than the disadvantage of having a + // much more complicated generator structure. See also comment in + // stubRoutines.hpp. + + StubRoutines::_forward_exception_entry = generate_forward_exception(); + + StubRoutines::_call_stub_entry = + generate_call_stub(StubRoutines::_call_stub_return_address); + + // is referenced by megamorphic call + StubRoutines::_catch_exception_entry = generate_catch_exception(); + + // Build this early so it's available for the interpreter. + StubRoutines::_throw_StackOverflowError_entry = + generate_throw_exception("StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime::throw_StackOverflowError)); + StubRoutines::_throw_delayed_StackOverflowError_entry = + generate_throw_exception("delayed StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime::throw_delayed_StackOverflowError)); + if (UseCRC32Intrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc_table_adr = (address)StubRoutines::aarch32::_crc_table; + StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(false); + } + + if (UseCRC32CIntrinsics) { + // set table address before stub generation which use it + StubRoutines::_crc32c_table_addr = (address)StubRoutines::aarch32::_crc32c_table; + StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32(true); + } + + if (UseAESIntrinsics) { + // set table address before stub generation which use it + StubRoutines::_aes_table_te_addr = (address)StubRoutines::aarch32::_aes_te_table; + StubRoutines::_aes_table_td_addr = (address)StubRoutines::aarch32::_aes_td_table; + + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); + + if (UseNeon) { + // AES CBC implementation uses NEON insructions + StubRoutines::_cipherBlockChaining_encryptAESCrypt_special = generate_cipherBlockChaining_encryptAESCrypt(false); + StubRoutines::_cipherBlockChaining_decryptAESCrypt_special = generate_cipherBlockChaining_decryptAESCrypt(false); + StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(true); + StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt(true); + } + } + + if (UseSHA1Intrinsics) { + StubRoutines::_sha1_table_addr = (address)StubRoutines::aarch32::_sha1_table; + StubRoutines::_sha1_implCompress = generate_sha_implCompress(); + } + if (UseSHA256Intrinsics) { + StubRoutines::_sha256_table_addr = (address)StubRoutines::aarch32::_sha256_table; + StubRoutines::_sha256_implCompress = generate_sha256_implCompress(); + } + if (UseSHA512Intrinsics) { + StubRoutines::_sha512_table_addr = (address)StubRoutines::aarch32::_sha512_table; + StubRoutines::_sha512_implCompress = generate_sha512_implCompress(); + } + + NativeCall::init(); + } +#undef __ +#define __ _masm-> + +#ifdef COMPILER2 + address generate_idiv_irem_stub(const char *name, bool want_mod) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + + address start = __ pc(); + + BLOCK_COMMENT("Entry:"); + __ enter(); // required for proper stackwalking of RuntimeStub frame + // C2 knows this kills rscratch1 and rscratch2, so not save them + + __ divide(r0, r1, r2, 32, want_mod); + + __ leave(); // required for proper stackwalking of RuntimeStub frame + __ ret(lr); + + return start; + } + + // Support for uint StubRoutine::Arm::partial_subtype_check( Klass sub, Klass super ); + // Arguments : + // + // ret : R0, returned + // icc/xcc: set as R0 (depending on wordSize) + // sub : R1, argument, not changed + // super: R2, argument, not changed + // raddr: LR, blown by call + address generate_partial_subtype_check() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); + address start = __ pc(); + + // based on SPARC check_klass_subtype_[fast|slow]_path (without CompressedOops) + + // R0 used as tmp_reg (in addition to return reg) + Register sub_klass = r1; + Register super_klass = r2; + Register tmp_reg2 = r3; + Register tmp_reg3 = r4; + +// inc_counter_np kills rscratch1 and rscratch2 +#define saved_set RegSet::of(tmp_reg2, tmp_reg3, rscratch1, rscratch2) + + Label L_loop, L_fail; + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + + // fast check should be redundant + + // slow check + { + __ push(saved_set, sp); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + + inc_counter_np(SharedRuntime::_partial_subtype_ctr); + + Register scan_temp = tmp_reg2; + Register count_temp = tmp_reg3; + + // We will consult the secondary-super array. + __ ldr(scan_temp, Address(sub_klass, ss_offset)); + + Register search_key = super_klass; + + // Load the array length. + __ ldr(count_temp, Address(scan_temp, Array::length_offset_in_bytes())); + __ add(scan_temp, scan_temp, Array::base_offset_in_bytes()); + + __ add(count_temp, count_temp, 1); + + // Top of search loop + __ bind(L_loop); + // Notes: + // scan_temp starts at the array elements + // count_temp is 1+size + __ subs(count_temp, count_temp, 1); + __ b(L_fail, Assembler::EQ); // not found in the array + + // Load next super to check + // In the array of super classes elements are pointer sized. + int element_size = wordSize; + __ ldr(r0, __ post(scan_temp, element_size)); + + // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list + __ subs(r0, r0, search_key); // set R0 to 0 on success (and flags to eq) + + // A miss means we are NOT a subtype and need to keep looping + __ b(L_loop, Assembler::NE); + + // Falling out the bottom means we found a hit; we ARE a subtype + + // Success. Cache the super we found and proceed in triumph. + __ str(super_klass, Address(sub_klass, sc_offset)); + + // Return success + // R0 is already 0 and flags are already set to eq + __ pop(saved_set, sp); + __ ret(lr); + + // Return failure + __ bind(L_fail); + __ movs_i(r0, 1); // sets the flags + __ pop(saved_set, sp); + __ ret(lr); + } + return start; + } +#undef saved_set + + address generate_string_compress_neon() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "string_compress_neon"); + address start = __ pc(); + + Register src = r2; + Register dst = r1; + Register len = r3; + Register t = r9; + Register t2 = r12; + FloatRegister a1 = d0; + FloatRegister a2 = d1; + FloatRegister b1 = d2; + FloatRegister b2 = d3; + Register result = r0; + + Label Lloop2, Lset_result; + + __ sub(len, len, 8+16); + __ vld1_64(a1, a2, __ post(src, 16), Assembler::ALIGN_STD); + __ bind(Lloop2); { + __ vld1_64(b1, __ post(src, 8), Assembler::ALIGN_STD); + __ vuzp_64_8(a1, a2); // a1 now has lower bytes, a2 upper + __ vld1_64(b2, __ post(src, 8), Assembler::ALIGN_STD); + __ vmov_f64(t, t2, a2); + __ vst1_64(a1, __ post(dst, 8), Assembler::ALIGN_STD); + __ orrs(t, t, t2); + __ b(Lset_result, Assembler::NE); + + __ vld1_64(a1, __ post(src, 8), Assembler::ALIGN_STD); + __ vuzp_64_8(b1, b2); // b1 now has lower bytes, b2 upper + __ vld1_64(a2, __ post(src, 8), Assembler::ALIGN_STD); + __ vmov_f64(t, t2, b2); + __ vst1_64(b1, __ post(dst, 8), Assembler::ALIGN_STD); + __ orrs(t, t, t2); + __ b(Lset_result, Assembler::NE); + __ subs(len, len, 16); + __ b(Lloop2, Assembler::GE); + } + + __ vuzp_64_8(a1, a2); // a1 now has lower bytes, a2 upper + __ vmov_f64(t, t2, a2); + __ vst1_64(a1, __ post(dst, 8), Assembler::ALIGN_STD); + __ orrs(t, t, t2); + __ b(Lset_result, Assembler::NE); + __ adds(len, len, 16); + __ ret(lr); // leaves Z-flag to check for per-char slow case + + __ bind(Lset_result); + __ movs_i(result, 0, Assembler::NE); // sets Z flag + __ ret(lr); + + return start; + } + + address generate_string_inflate_neon() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "string_inflate_neon"); + address start = __ pc(); + + Register src = r0; + Register dst = r1; + Register len = r2; + FloatRegister a1 = d0; + + Label Lloop2; + + __ sub(len, len, 16); + __ bind(Lloop2); { + __ vld1_64(d0, __ post(src, 8), Assembler::ALIGN_STD); + __ vmovl_8u(q0, d0); + __ vst1_64(d0, d1, __ post(dst, 16), Assembler::ALIGN_STD); + __ vld1_64(d0, __ post(src, 8), Assembler::ALIGN_STD); + __ vmovl_8u(q0, d0); + __ vst1_64(d0, d1, __ post(dst, 16), Assembler::ALIGN_STD); + __ subs(len, len, 16); + __ b(Lloop2, Assembler::HS); + } + + __ adds(len, len, 16); // sets Z flag to check in intrinsic + __ ret(lr); + + return start; + } + + void generate_c2_stubs() { + StubRoutines::aarch32::_idiv_entry = + generate_idiv_irem_stub("idiv_c2_stub", false); + StubRoutines::aarch32::_irem_entry = + generate_idiv_irem_stub("irem_c2_stub", true); + StubRoutines::aarch32::_partial_subtype_check = + generate_partial_subtype_check(); + if (VM_Version::features() & FT_AdvSIMD) { + StubRoutines::aarch32::_string_compress_neon = + generate_string_compress_neon(); + StubRoutines::aarch32::_string_inflate_neon = + generate_string_inflate_neon(); + } + } +#endif + + void generate_all() { + // support for verify_oop (must happen after universe_init) + StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); + StubRoutines::_throw_AbstractMethodError_entry = + generate_throw_exception("AbstractMethodError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_AbstractMethodError)); + + StubRoutines::_throw_IncompatibleClassChangeError_entry = + generate_throw_exception("IncompatibleClassChangeError throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_IncompatibleClassChangeError)); + + StubRoutines::_throw_NullPointerException_at_call_entry = + generate_throw_exception("NullPointerException at call throw_exception", + CAST_FROM_FN_PTR(address, + SharedRuntime:: + throw_NullPointerException_at_call)); + + // arraycopy stubs used by compilers + generate_arraycopy_stubs(); + +#ifdef COMPILER2 + if (UseMultiplyToLenIntrinsic) { + StubRoutines::_multiplyToLen = generate_multiplyToLen(); + StubRoutines::_mulAdd = generate_mulAdd(); + } +#endif + + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); + StubRoutines::_montgomeryMultiply = g.generate_multiply(); + } + + if (UseMontgomerySquareIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomerySquare"); + MontgomeryMultiplyGenerator g(_masm, /*squaring*/true); + StubRoutines::_montgomerySquare = g.generate_square(); + } + + // Safefetch stubs. + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, + &StubRoutines::_safefetch32_fault_pc, + &StubRoutines::_safefetch32_continuation_pc); + generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, + &StubRoutines::_safefetchN_fault_pc, + &StubRoutines::_safefetchN_continuation_pc); + +#ifdef COMPILER2 + generate_c2_stubs(); +#endif + } + + public: + StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { + if (all) { + generate_all(); + } else { + generate_initial(); + } + + } +}; // end class declaration + +void StubGenerator_generate(CodeBuffer* code, bool all) { + StubGenerator g(code, all); +} --- /dev/null 2018-09-25 19:25:26.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/stubRoutines_aarch32.cpp 2018-09-25 19:25:26.000000000 +0300 @@ -0,0 +1,797 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "utilities/globalDefinitions.hpp" + +// Implementation of the platform-specific part of StubRoutines - for +// a description of how to extend it, see the stubRoutines.hpp file. + +#ifdef COMPILER2 +address StubRoutines::aarch32::_idiv_entry = NULL; +address StubRoutines::aarch32::_irem_entry = NULL; +address StubRoutines::aarch32::_partial_subtype_check = NULL; +address StubRoutines::aarch32::_string_compress_neon = NULL; +address StubRoutines::aarch32::_string_inflate_neon = NULL; +#endif +/** + * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h + */ +juint StubRoutines::aarch32::_crc_table[] + ATTRIBUTE_ALIGNED(4096) = +{ + // Table 0 + 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, + 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, + 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, + 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, + 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, + 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, + 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, + 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, + 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, + 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, + 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, + 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, + 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, + 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, + 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, + 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, + 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, + 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, + 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, + 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, + 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, + 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, + 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, + 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, + 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, + 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, + 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, + 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, + 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, + 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, + 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, + 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, + 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, + 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, + 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, + 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, + 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, + 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, + 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, + 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, + 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, + 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, + 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, + 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, + 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, + 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, + 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, + 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, + 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, + 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, + 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, + 0x2d02ef8dUL, + + // Table 1 + 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, + 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, + 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, + 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, + 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, + 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, + 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, + 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, + 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, + 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, + 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, + 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, + 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, + 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, + 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, + 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, + 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, + 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, + 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, + 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, + 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, + 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, + 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, + 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, + 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, + 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, + 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, + 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, + 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, + 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, + 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, + 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, + 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, + 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, + 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, + 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, + 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, + 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, + 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, + 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, + 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, + 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, + 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, + 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, + 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, + 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, + 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, + 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, + 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, + 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, + 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, + 0x9324fd72UL, + + // Table 2 + 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, + 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, + 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, + 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, + 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, + 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, + 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, + 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, + 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, + 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, + 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, + 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, + 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, + 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, + 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, + 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, + 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, + 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, + 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, + 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, + 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, + 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, + 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, + 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, + 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, + 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, + 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, + 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, + 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, + 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, + 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, + 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, + 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, + 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, + 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, + 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, + 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, + 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, + 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, + 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, + 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, + 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, + 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, + 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, + 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, + 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, + 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, + 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, + 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, + 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, + 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, + 0xbe9834edUL, + + // Table 3 + 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, + 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, + 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, + 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, + 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, + 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, + 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, + 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, + 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, + 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, + 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, + 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, + 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, + 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, + 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, + 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, + 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, + 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, + 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, + 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, + 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, + 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, + 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, + 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, + 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, + 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, + 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, + 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, + 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, + 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, + 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, + 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, + 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, + 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, + 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, + 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, + 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, + 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, + 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, + 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, + 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, + 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, + 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, + 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, + 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, + 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, + 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, + 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, + 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, + 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, + 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, + 0xde0506f1UL, + // Constants for Neon CRC32 implementation, 128-bit operation + // k3 = 0xba8ccbe8 = x^160 mod poly - bit reversed + // k4 = 0xa06a2517 = x^128 mod poly - bit reversed + // poly = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0 + 0 + 0xba8ccbe8, 0xa06a2517, // k4:k3 + 0x8cbae8cb, 0x6aa01725, // byte swap + 0xcbe8ba8c, 0x2517a06a, // word swap + 0xe8cb8cba, 0x17256aa0, // byte swap of word swap +}; + +/** + * CRC32C constants lookup table + */ + +juint StubRoutines::aarch32::_crc32c_table[] = +{ + // Table 0 + 0x00000000UL, 0xf26b8303UL, 0xe13b70f7UL, 0x1350f3f4UL, 0xc79a971fUL, + 0x35f1141cUL, 0x26a1e7e8UL, 0xd4ca64ebUL, 0x8ad958cfUL, 0x78b2dbccUL, + 0x6be22838UL, 0x9989ab3bUL, 0x4d43cfd0UL, 0xbf284cd3UL, 0xac78bf27UL, + 0x5e133c24UL, 0x105ec76fUL, 0xe235446cUL, 0xf165b798UL, 0x030e349bUL, + 0xd7c45070UL, 0x25afd373UL, 0x36ff2087UL, 0xc494a384UL, 0x9a879fa0UL, + 0x68ec1ca3UL, 0x7bbcef57UL, 0x89d76c54UL, 0x5d1d08bfUL, 0xaf768bbcUL, + 0xbc267848UL, 0x4e4dfb4bUL, 0x20bd8edeUL, 0xd2d60dddUL, 0xc186fe29UL, + 0x33ed7d2aUL, 0xe72719c1UL, 0x154c9ac2UL, 0x061c6936UL, 0xf477ea35UL, + 0xaa64d611UL, 0x580f5512UL, 0x4b5fa6e6UL, 0xb93425e5UL, 0x6dfe410eUL, + 0x9f95c20dUL, 0x8cc531f9UL, 0x7eaeb2faUL, 0x30e349b1UL, 0xc288cab2UL, + 0xd1d83946UL, 0x23b3ba45UL, 0xf779deaeUL, 0x05125dadUL, 0x1642ae59UL, + 0xe4292d5aUL, 0xba3a117eUL, 0x4851927dUL, 0x5b016189UL, 0xa96ae28aUL, + 0x7da08661UL, 0x8fcb0562UL, 0x9c9bf696UL, 0x6ef07595UL, 0x417b1dbcUL, + 0xb3109ebfUL, 0xa0406d4bUL, 0x522bee48UL, 0x86e18aa3UL, 0x748a09a0UL, + 0x67dafa54UL, 0x95b17957UL, 0xcba24573UL, 0x39c9c670UL, 0x2a993584UL, + 0xd8f2b687UL, 0x0c38d26cUL, 0xfe53516fUL, 0xed03a29bUL, 0x1f682198UL, + 0x5125dad3UL, 0xa34e59d0UL, 0xb01eaa24UL, 0x42752927UL, 0x96bf4dccUL, + 0x64d4cecfUL, 0x77843d3bUL, 0x85efbe38UL, 0xdbfc821cUL, 0x2997011fUL, + 0x3ac7f2ebUL, 0xc8ac71e8UL, 0x1c661503UL, 0xee0d9600UL, 0xfd5d65f4UL, + 0x0f36e6f7UL, 0x61c69362UL, 0x93ad1061UL, 0x80fde395UL, 0x72966096UL, + 0xa65c047dUL, 0x5437877eUL, 0x4767748aUL, 0xb50cf789UL, 0xeb1fcbadUL, + 0x197448aeUL, 0x0a24bb5aUL, 0xf84f3859UL, 0x2c855cb2UL, 0xdeeedfb1UL, + 0xcdbe2c45UL, 0x3fd5af46UL, 0x7198540dUL, 0x83f3d70eUL, 0x90a324faUL, + 0x62c8a7f9UL, 0xb602c312UL, 0x44694011UL, 0x5739b3e5UL, 0xa55230e6UL, + 0xfb410cc2UL, 0x092a8fc1UL, 0x1a7a7c35UL, 0xe811ff36UL, 0x3cdb9bddUL, + 0xceb018deUL, 0xdde0eb2aUL, 0x2f8b6829UL, 0x82f63b78UL, 0x709db87bUL, + 0x63cd4b8fUL, 0x91a6c88cUL, 0x456cac67UL, 0xb7072f64UL, 0xa457dc90UL, + 0x563c5f93UL, 0x082f63b7UL, 0xfa44e0b4UL, 0xe9141340UL, 0x1b7f9043UL, + 0xcfb5f4a8UL, 0x3dde77abUL, 0x2e8e845fUL, 0xdce5075cUL, 0x92a8fc17UL, + 0x60c37f14UL, 0x73938ce0UL, 0x81f80fe3UL, 0x55326b08UL, 0xa759e80bUL, + 0xb4091bffUL, 0x466298fcUL, 0x1871a4d8UL, 0xea1a27dbUL, 0xf94ad42fUL, + 0x0b21572cUL, 0xdfeb33c7UL, 0x2d80b0c4UL, 0x3ed04330UL, 0xccbbc033UL, + 0xa24bb5a6UL, 0x502036a5UL, 0x4370c551UL, 0xb11b4652UL, 0x65d122b9UL, + 0x97baa1baUL, 0x84ea524eUL, 0x7681d14dUL, 0x2892ed69UL, 0xdaf96e6aUL, + 0xc9a99d9eUL, 0x3bc21e9dUL, 0xef087a76UL, 0x1d63f975UL, 0x0e330a81UL, + 0xfc588982UL, 0xb21572c9UL, 0x407ef1caUL, 0x532e023eUL, 0xa145813dUL, + 0x758fe5d6UL, 0x87e466d5UL, 0x94b49521UL, 0x66df1622UL, 0x38cc2a06UL, + 0xcaa7a905UL, 0xd9f75af1UL, 0x2b9cd9f2UL, 0xff56bd19UL, 0x0d3d3e1aUL, + 0x1e6dcdeeUL, 0xec064eedUL, 0xc38d26c4UL, 0x31e6a5c7UL, 0x22b65633UL, + 0xd0ddd530UL, 0x0417b1dbUL, 0xf67c32d8UL, 0xe52cc12cUL, 0x1747422fUL, + 0x49547e0bUL, 0xbb3ffd08UL, 0xa86f0efcUL, 0x5a048dffUL, 0x8ecee914UL, + 0x7ca56a17UL, 0x6ff599e3UL, 0x9d9e1ae0UL, 0xd3d3e1abUL, 0x21b862a8UL, + 0x32e8915cUL, 0xc083125fUL, 0x144976b4UL, 0xe622f5b7UL, 0xf5720643UL, + 0x07198540UL, 0x590ab964UL, 0xab613a67UL, 0xb831c993UL, 0x4a5a4a90UL, + 0x9e902e7bUL, 0x6cfbad78UL, 0x7fab5e8cUL, 0x8dc0dd8fUL, 0xe330a81aUL, + 0x115b2b19UL, 0x020bd8edUL, 0xf0605beeUL, 0x24aa3f05UL, 0xd6c1bc06UL, + 0xc5914ff2UL, 0x37faccf1UL, 0x69e9f0d5UL, 0x9b8273d6UL, 0x88d28022UL, + 0x7ab90321UL, 0xae7367caUL, 0x5c18e4c9UL, 0x4f48173dUL, 0xbd23943eUL, + 0xf36e6f75UL, 0x0105ec76UL, 0x12551f82UL, 0xe03e9c81UL, 0x34f4f86aUL, + 0xc69f7b69UL, 0xd5cf889dUL, 0x27a40b9eUL, 0x79b737baUL, 0x8bdcb4b9UL, + 0x988c474dUL, 0x6ae7c44eUL, 0xbe2da0a5UL, 0x4c4623a6UL, 0x5f16d052UL, + 0xad7d5351UL, + + // Table 1 + 0x00000000UL, 0x13a29877UL, 0x274530eeUL, 0x34e7a899UL, 0x4e8a61dcUL, + 0x5d28f9abUL, 0x69cf5132UL, 0x7a6dc945UL, 0x9d14c3b8UL, 0x8eb65bcfUL, + 0xba51f356UL, 0xa9f36b21UL, 0xd39ea264UL, 0xc03c3a13UL, 0xf4db928aUL, + 0xe7790afdUL, 0x3fc5f181UL, 0x2c6769f6UL, 0x1880c16fUL, 0x0b225918UL, + 0x714f905dUL, 0x62ed082aUL, 0x560aa0b3UL, 0x45a838c4UL, 0xa2d13239UL, + 0xb173aa4eUL, 0x859402d7UL, 0x96369aa0UL, 0xec5b53e5UL, 0xfff9cb92UL, + 0xcb1e630bUL, 0xd8bcfb7cUL, 0x7f8be302UL, 0x6c297b75UL, 0x58ced3ecUL, + 0x4b6c4b9bUL, 0x310182deUL, 0x22a31aa9UL, 0x1644b230UL, 0x05e62a47UL, + 0xe29f20baUL, 0xf13db8cdUL, 0xc5da1054UL, 0xd6788823UL, 0xac154166UL, + 0xbfb7d911UL, 0x8b507188UL, 0x98f2e9ffUL, 0x404e1283UL, 0x53ec8af4UL, + 0x670b226dUL, 0x74a9ba1aUL, 0x0ec4735fUL, 0x1d66eb28UL, 0x298143b1UL, + 0x3a23dbc6UL, 0xdd5ad13bUL, 0xcef8494cUL, 0xfa1fe1d5UL, 0xe9bd79a2UL, + 0x93d0b0e7UL, 0x80722890UL, 0xb4958009UL, 0xa737187eUL, 0xff17c604UL, + 0xecb55e73UL, 0xd852f6eaUL, 0xcbf06e9dUL, 0xb19da7d8UL, 0xa23f3fafUL, + 0x96d89736UL, 0x857a0f41UL, 0x620305bcUL, 0x71a19dcbUL, 0x45463552UL, + 0x56e4ad25UL, 0x2c896460UL, 0x3f2bfc17UL, 0x0bcc548eUL, 0x186eccf9UL, + 0xc0d23785UL, 0xd370aff2UL, 0xe797076bUL, 0xf4359f1cUL, 0x8e585659UL, + 0x9dface2eUL, 0xa91d66b7UL, 0xbabffec0UL, 0x5dc6f43dUL, 0x4e646c4aUL, + 0x7a83c4d3UL, 0x69215ca4UL, 0x134c95e1UL, 0x00ee0d96UL, 0x3409a50fUL, + 0x27ab3d78UL, 0x809c2506UL, 0x933ebd71UL, 0xa7d915e8UL, 0xb47b8d9fUL, + 0xce1644daUL, 0xddb4dcadUL, 0xe9537434UL, 0xfaf1ec43UL, 0x1d88e6beUL, + 0x0e2a7ec9UL, 0x3acdd650UL, 0x296f4e27UL, 0x53028762UL, 0x40a01f15UL, + 0x7447b78cUL, 0x67e52ffbUL, 0xbf59d487UL, 0xacfb4cf0UL, 0x981ce469UL, + 0x8bbe7c1eUL, 0xf1d3b55bUL, 0xe2712d2cUL, 0xd69685b5UL, 0xc5341dc2UL, + 0x224d173fUL, 0x31ef8f48UL, 0x050827d1UL, 0x16aabfa6UL, 0x6cc776e3UL, + 0x7f65ee94UL, 0x4b82460dUL, 0x5820de7aUL, 0xfbc3faf9UL, 0xe861628eUL, + 0xdc86ca17UL, 0xcf245260UL, 0xb5499b25UL, 0xa6eb0352UL, 0x920cabcbUL, + 0x81ae33bcUL, 0x66d73941UL, 0x7575a136UL, 0x419209afUL, 0x523091d8UL, + 0x285d589dUL, 0x3bffc0eaUL, 0x0f186873UL, 0x1cbaf004UL, 0xc4060b78UL, + 0xd7a4930fUL, 0xe3433b96UL, 0xf0e1a3e1UL, 0x8a8c6aa4UL, 0x992ef2d3UL, + 0xadc95a4aUL, 0xbe6bc23dUL, 0x5912c8c0UL, 0x4ab050b7UL, 0x7e57f82eUL, + 0x6df56059UL, 0x1798a91cUL, 0x043a316bUL, 0x30dd99f2UL, 0x237f0185UL, + 0x844819fbUL, 0x97ea818cUL, 0xa30d2915UL, 0xb0afb162UL, 0xcac27827UL, + 0xd960e050UL, 0xed8748c9UL, 0xfe25d0beUL, 0x195cda43UL, 0x0afe4234UL, + 0x3e19eaadUL, 0x2dbb72daUL, 0x57d6bb9fUL, 0x447423e8UL, 0x70938b71UL, + 0x63311306UL, 0xbb8de87aUL, 0xa82f700dUL, 0x9cc8d894UL, 0x8f6a40e3UL, + 0xf50789a6UL, 0xe6a511d1UL, 0xd242b948UL, 0xc1e0213fUL, 0x26992bc2UL, + 0x353bb3b5UL, 0x01dc1b2cUL, 0x127e835bUL, 0x68134a1eUL, 0x7bb1d269UL, + 0x4f567af0UL, 0x5cf4e287UL, 0x04d43cfdUL, 0x1776a48aUL, 0x23910c13UL, + 0x30339464UL, 0x4a5e5d21UL, 0x59fcc556UL, 0x6d1b6dcfUL, 0x7eb9f5b8UL, + 0x99c0ff45UL, 0x8a626732UL, 0xbe85cfabUL, 0xad2757dcUL, 0xd74a9e99UL, + 0xc4e806eeUL, 0xf00fae77UL, 0xe3ad3600UL, 0x3b11cd7cUL, 0x28b3550bUL, + 0x1c54fd92UL, 0x0ff665e5UL, 0x759baca0UL, 0x663934d7UL, 0x52de9c4eUL, + 0x417c0439UL, 0xa6050ec4UL, 0xb5a796b3UL, 0x81403e2aUL, 0x92e2a65dUL, + 0xe88f6f18UL, 0xfb2df76fUL, 0xcfca5ff6UL, 0xdc68c781UL, 0x7b5fdfffUL, + 0x68fd4788UL, 0x5c1aef11UL, 0x4fb87766UL, 0x35d5be23UL, 0x26772654UL, + 0x12908ecdUL, 0x013216baUL, 0xe64b1c47UL, 0xf5e98430UL, 0xc10e2ca9UL, + 0xd2acb4deUL, 0xa8c17d9bUL, 0xbb63e5ecUL, 0x8f844d75UL, 0x9c26d502UL, + 0x449a2e7eUL, 0x5738b609UL, 0x63df1e90UL, 0x707d86e7UL, 0x0a104fa2UL, + 0x19b2d7d5UL, 0x2d557f4cUL, 0x3ef7e73bUL, 0xd98eedc6UL, 0xca2c75b1UL, + 0xfecbdd28UL, 0xed69455fUL, 0x97048c1aUL, 0x84a6146dUL, 0xb041bcf4UL, + 0xa3e32483UL, + + // Table 2 + 0x00000000UL, 0xa541927eUL, 0x4f6f520dUL, 0xea2ec073UL, 0x9edea41aUL, + 0x3b9f3664UL, 0xd1b1f617UL, 0x74f06469UL, 0x38513ec5UL, 0x9d10acbbUL, + 0x773e6cc8UL, 0xd27ffeb6UL, 0xa68f9adfUL, 0x03ce08a1UL, 0xe9e0c8d2UL, + 0x4ca15aacUL, 0x70a27d8aUL, 0xd5e3eff4UL, 0x3fcd2f87UL, 0x9a8cbdf9UL, + 0xee7cd990UL, 0x4b3d4beeUL, 0xa1138b9dUL, 0x045219e3UL, 0x48f3434fUL, + 0xedb2d131UL, 0x079c1142UL, 0xa2dd833cUL, 0xd62de755UL, 0x736c752bUL, + 0x9942b558UL, 0x3c032726UL, 0xe144fb14UL, 0x4405696aUL, 0xae2ba919UL, + 0x0b6a3b67UL, 0x7f9a5f0eUL, 0xdadbcd70UL, 0x30f50d03UL, 0x95b49f7dUL, + 0xd915c5d1UL, 0x7c5457afUL, 0x967a97dcUL, 0x333b05a2UL, 0x47cb61cbUL, + 0xe28af3b5UL, 0x08a433c6UL, 0xade5a1b8UL, 0x91e6869eUL, 0x34a714e0UL, + 0xde89d493UL, 0x7bc846edUL, 0x0f382284UL, 0xaa79b0faUL, 0x40577089UL, + 0xe516e2f7UL, 0xa9b7b85bUL, 0x0cf62a25UL, 0xe6d8ea56UL, 0x43997828UL, + 0x37691c41UL, 0x92288e3fUL, 0x78064e4cUL, 0xdd47dc32UL, 0xc76580d9UL, + 0x622412a7UL, 0x880ad2d4UL, 0x2d4b40aaUL, 0x59bb24c3UL, 0xfcfab6bdUL, + 0x16d476ceUL, 0xb395e4b0UL, 0xff34be1cUL, 0x5a752c62UL, 0xb05bec11UL, + 0x151a7e6fUL, 0x61ea1a06UL, 0xc4ab8878UL, 0x2e85480bUL, 0x8bc4da75UL, + 0xb7c7fd53UL, 0x12866f2dUL, 0xf8a8af5eUL, 0x5de93d20UL, 0x29195949UL, + 0x8c58cb37UL, 0x66760b44UL, 0xc337993aUL, 0x8f96c396UL, 0x2ad751e8UL, + 0xc0f9919bUL, 0x65b803e5UL, 0x1148678cUL, 0xb409f5f2UL, 0x5e273581UL, + 0xfb66a7ffUL, 0x26217bcdUL, 0x8360e9b3UL, 0x694e29c0UL, 0xcc0fbbbeUL, + 0xb8ffdfd7UL, 0x1dbe4da9UL, 0xf7908ddaUL, 0x52d11fa4UL, 0x1e704508UL, + 0xbb31d776UL, 0x511f1705UL, 0xf45e857bUL, 0x80aee112UL, 0x25ef736cUL, + 0xcfc1b31fUL, 0x6a802161UL, 0x56830647UL, 0xf3c29439UL, 0x19ec544aUL, + 0xbcadc634UL, 0xc85da25dUL, 0x6d1c3023UL, 0x8732f050UL, 0x2273622eUL, + 0x6ed23882UL, 0xcb93aafcUL, 0x21bd6a8fUL, 0x84fcf8f1UL, 0xf00c9c98UL, + 0x554d0ee6UL, 0xbf63ce95UL, 0x1a225cebUL, 0x8b277743UL, 0x2e66e53dUL, + 0xc448254eUL, 0x6109b730UL, 0x15f9d359UL, 0xb0b84127UL, 0x5a968154UL, + 0xffd7132aUL, 0xb3764986UL, 0x1637dbf8UL, 0xfc191b8bUL, 0x595889f5UL, + 0x2da8ed9cUL, 0x88e97fe2UL, 0x62c7bf91UL, 0xc7862defUL, 0xfb850ac9UL, + 0x5ec498b7UL, 0xb4ea58c4UL, 0x11abcabaUL, 0x655baed3UL, 0xc01a3cadUL, + 0x2a34fcdeUL, 0x8f756ea0UL, 0xc3d4340cUL, 0x6695a672UL, 0x8cbb6601UL, + 0x29faf47fUL, 0x5d0a9016UL, 0xf84b0268UL, 0x1265c21bUL, 0xb7245065UL, + 0x6a638c57UL, 0xcf221e29UL, 0x250cde5aUL, 0x804d4c24UL, 0xf4bd284dUL, + 0x51fcba33UL, 0xbbd27a40UL, 0x1e93e83eUL, 0x5232b292UL, 0xf77320ecUL, + 0x1d5de09fUL, 0xb81c72e1UL, 0xccec1688UL, 0x69ad84f6UL, 0x83834485UL, + 0x26c2d6fbUL, 0x1ac1f1ddUL, 0xbf8063a3UL, 0x55aea3d0UL, 0xf0ef31aeUL, + 0x841f55c7UL, 0x215ec7b9UL, 0xcb7007caUL, 0x6e3195b4UL, 0x2290cf18UL, + 0x87d15d66UL, 0x6dff9d15UL, 0xc8be0f6bUL, 0xbc4e6b02UL, 0x190ff97cUL, + 0xf321390fUL, 0x5660ab71UL, 0x4c42f79aUL, 0xe90365e4UL, 0x032da597UL, + 0xa66c37e9UL, 0xd29c5380UL, 0x77ddc1feUL, 0x9df3018dUL, 0x38b293f3UL, + 0x7413c95fUL, 0xd1525b21UL, 0x3b7c9b52UL, 0x9e3d092cUL, 0xeacd6d45UL, + 0x4f8cff3bUL, 0xa5a23f48UL, 0x00e3ad36UL, 0x3ce08a10UL, 0x99a1186eUL, + 0x738fd81dUL, 0xd6ce4a63UL, 0xa23e2e0aUL, 0x077fbc74UL, 0xed517c07UL, + 0x4810ee79UL, 0x04b1b4d5UL, 0xa1f026abUL, 0x4bdee6d8UL, 0xee9f74a6UL, + 0x9a6f10cfUL, 0x3f2e82b1UL, 0xd50042c2UL, 0x7041d0bcUL, 0xad060c8eUL, + 0x08479ef0UL, 0xe2695e83UL, 0x4728ccfdUL, 0x33d8a894UL, 0x96993aeaUL, + 0x7cb7fa99UL, 0xd9f668e7UL, 0x9557324bUL, 0x3016a035UL, 0xda386046UL, + 0x7f79f238UL, 0x0b899651UL, 0xaec8042fUL, 0x44e6c45cUL, 0xe1a75622UL, + 0xdda47104UL, 0x78e5e37aUL, 0x92cb2309UL, 0x378ab177UL, 0x437ad51eUL, + 0xe63b4760UL, 0x0c158713UL, 0xa954156dUL, 0xe5f54fc1UL, 0x40b4ddbfUL, + 0xaa9a1dccUL, 0x0fdb8fb2UL, 0x7b2bebdbUL, 0xde6a79a5UL, 0x3444b9d6UL, + 0x91052ba8UL, + + // Table 3 + 0x00000000UL, 0xdd45aab8UL, 0xbf672381UL, 0x62228939UL, 0x7b2231f3UL, + 0xa6679b4bUL, 0xc4451272UL, 0x1900b8caUL, 0xf64463e6UL, 0x2b01c95eUL, + 0x49234067UL, 0x9466eadfUL, 0x8d665215UL, 0x5023f8adUL, 0x32017194UL, + 0xef44db2cUL, 0xe964b13dUL, 0x34211b85UL, 0x560392bcUL, 0x8b463804UL, + 0x924680ceUL, 0x4f032a76UL, 0x2d21a34fUL, 0xf06409f7UL, 0x1f20d2dbUL, + 0xc2657863UL, 0xa047f15aUL, 0x7d025be2UL, 0x6402e328UL, 0xb9474990UL, + 0xdb65c0a9UL, 0x06206a11UL, 0xd725148bUL, 0x0a60be33UL, 0x6842370aUL, + 0xb5079db2UL, 0xac072578UL, 0x71428fc0UL, 0x136006f9UL, 0xce25ac41UL, + 0x2161776dUL, 0xfc24ddd5UL, 0x9e0654ecUL, 0x4343fe54UL, 0x5a43469eUL, + 0x8706ec26UL, 0xe524651fUL, 0x3861cfa7UL, 0x3e41a5b6UL, 0xe3040f0eUL, + 0x81268637UL, 0x5c632c8fUL, 0x45639445UL, 0x98263efdUL, 0xfa04b7c4UL, + 0x27411d7cUL, 0xc805c650UL, 0x15406ce8UL, 0x7762e5d1UL, 0xaa274f69UL, + 0xb327f7a3UL, 0x6e625d1bUL, 0x0c40d422UL, 0xd1057e9aUL, 0xaba65fe7UL, + 0x76e3f55fUL, 0x14c17c66UL, 0xc984d6deUL, 0xd0846e14UL, 0x0dc1c4acUL, + 0x6fe34d95UL, 0xb2a6e72dUL, 0x5de23c01UL, 0x80a796b9UL, 0xe2851f80UL, + 0x3fc0b538UL, 0x26c00df2UL, 0xfb85a74aUL, 0x99a72e73UL, 0x44e284cbUL, + 0x42c2eedaUL, 0x9f874462UL, 0xfda5cd5bUL, 0x20e067e3UL, 0x39e0df29UL, + 0xe4a57591UL, 0x8687fca8UL, 0x5bc25610UL, 0xb4868d3cUL, 0x69c32784UL, + 0x0be1aebdUL, 0xd6a40405UL, 0xcfa4bccfUL, 0x12e11677UL, 0x70c39f4eUL, + 0xad8635f6UL, 0x7c834b6cUL, 0xa1c6e1d4UL, 0xc3e468edUL, 0x1ea1c255UL, + 0x07a17a9fUL, 0xdae4d027UL, 0xb8c6591eUL, 0x6583f3a6UL, 0x8ac7288aUL, + 0x57828232UL, 0x35a00b0bUL, 0xe8e5a1b3UL, 0xf1e51979UL, 0x2ca0b3c1UL, + 0x4e823af8UL, 0x93c79040UL, 0x95e7fa51UL, 0x48a250e9UL, 0x2a80d9d0UL, + 0xf7c57368UL, 0xeec5cba2UL, 0x3380611aUL, 0x51a2e823UL, 0x8ce7429bUL, + 0x63a399b7UL, 0xbee6330fUL, 0xdcc4ba36UL, 0x0181108eUL, 0x1881a844UL, + 0xc5c402fcUL, 0xa7e68bc5UL, 0x7aa3217dUL, 0x52a0c93fUL, 0x8fe56387UL, + 0xedc7eabeUL, 0x30824006UL, 0x2982f8ccUL, 0xf4c75274UL, 0x96e5db4dUL, + 0x4ba071f5UL, 0xa4e4aad9UL, 0x79a10061UL, 0x1b838958UL, 0xc6c623e0UL, + 0xdfc69b2aUL, 0x02833192UL, 0x60a1b8abUL, 0xbde41213UL, 0xbbc47802UL, + 0x6681d2baUL, 0x04a35b83UL, 0xd9e6f13bUL, 0xc0e649f1UL, 0x1da3e349UL, + 0x7f816a70UL, 0xa2c4c0c8UL, 0x4d801be4UL, 0x90c5b15cUL, 0xf2e73865UL, + 0x2fa292ddUL, 0x36a22a17UL, 0xebe780afUL, 0x89c50996UL, 0x5480a32eUL, + 0x8585ddb4UL, 0x58c0770cUL, 0x3ae2fe35UL, 0xe7a7548dUL, 0xfea7ec47UL, + 0x23e246ffUL, 0x41c0cfc6UL, 0x9c85657eUL, 0x73c1be52UL, 0xae8414eaUL, + 0xcca69dd3UL, 0x11e3376bUL, 0x08e38fa1UL, 0xd5a62519UL, 0xb784ac20UL, + 0x6ac10698UL, 0x6ce16c89UL, 0xb1a4c631UL, 0xd3864f08UL, 0x0ec3e5b0UL, + 0x17c35d7aUL, 0xca86f7c2UL, 0xa8a47efbUL, 0x75e1d443UL, 0x9aa50f6fUL, + 0x47e0a5d7UL, 0x25c22ceeUL, 0xf8878656UL, 0xe1873e9cUL, 0x3cc29424UL, + 0x5ee01d1dUL, 0x83a5b7a5UL, 0xf90696d8UL, 0x24433c60UL, 0x4661b559UL, + 0x9b241fe1UL, 0x8224a72bUL, 0x5f610d93UL, 0x3d4384aaUL, 0xe0062e12UL, + 0x0f42f53eUL, 0xd2075f86UL, 0xb025d6bfUL, 0x6d607c07UL, 0x7460c4cdUL, + 0xa9256e75UL, 0xcb07e74cUL, 0x16424df4UL, 0x106227e5UL, 0xcd278d5dUL, + 0xaf050464UL, 0x7240aedcUL, 0x6b401616UL, 0xb605bcaeUL, 0xd4273597UL, + 0x09629f2fUL, 0xe6264403UL, 0x3b63eebbUL, 0x59416782UL, 0x8404cd3aUL, + 0x9d0475f0UL, 0x4041df48UL, 0x22635671UL, 0xff26fcc9UL, 0x2e238253UL, + 0xf36628ebUL, 0x9144a1d2UL, 0x4c010b6aUL, 0x5501b3a0UL, 0x88441918UL, + 0xea669021UL, 0x37233a99UL, 0xd867e1b5UL, 0x05224b0dUL, 0x6700c234UL, + 0xba45688cUL, 0xa345d046UL, 0x7e007afeUL, 0x1c22f3c7UL, 0xc167597fUL, + 0xc747336eUL, 0x1a0299d6UL, 0x782010efUL, 0xa565ba57UL, 0xbc65029dUL, + 0x6120a825UL, 0x0302211cUL, 0xde478ba4UL, 0x31035088UL, 0xec46fa30UL, + 0x8e647309UL, 0x5321d9b1UL, 0x4a21617bUL, 0x9764cbc3UL, 0xf54642faUL, + 0x2803e842UL, + // Constants for Neon CRC32C implementation, 128-bit operation + // k3 = 0x790606ff = x^160 mod poly - bit reversed + // k4 = 0x18b8ea18 = x^128 mod poly - bit reversed + // poly = x^32 + x^28 + x^27 + x^26 + x^25 + x^23 + x^22 + x^20 + x^19 + x^18 + x^14 + x^13 + x^11 + x^10 + x^9 + x^8 + x^6 + x^0 + 0 + 0x790606ff, 0x18b8ea18, // k4:k3 + 0x0679ff06, 0xb81818ea, // byte swap + 0x06ff7906, 0xea1818b8, // word swap + 0xff060679, 0x18eab818, // byte swap of word swap +}; + + +/** + * AES_TE[] table for AES encryption + */ +juint StubRoutines::aarch32::_aes_te_table[] + __attribute__ ((aligned(2048))) = +{ + //T + 0xc66363a5UL, 0xf87c7c84UL, 0xee777799UL, 0xf67b7b8dUL, + 0xfff2f20dUL, 0xd66b6bbdUL, 0xde6f6fb1UL, 0x91c5c554UL, + 0x60303050UL, 0x02010103UL, 0xce6767a9UL, 0x562b2b7dUL, + 0xe7fefe19UL, 0xb5d7d762UL, 0x4dababe6UL, 0xec76769aUL, + 0x8fcaca45UL, 0x1f82829dUL, 0x89c9c940UL, 0xfa7d7d87UL, + 0xeffafa15UL, 0xb25959ebUL, 0x8e4747c9UL, 0xfbf0f00bUL, + 0x41adadecUL, 0xb3d4d467UL, 0x5fa2a2fdUL, 0x45afafeaUL, + 0x239c9cbfUL, 0x53a4a4f7UL, 0xe4727296UL, 0x9bc0c05bUL, + 0x75b7b7c2UL, 0xe1fdfd1cUL, 0x3d9393aeUL, 0x4c26266aUL, + 0x6c36365aUL, 0x7e3f3f41UL, 0xf5f7f702UL, 0x83cccc4fUL, + 0x6834345cUL, 0x51a5a5f4UL, 0xd1e5e534UL, 0xf9f1f108UL, + 0xe2717193UL, 0xabd8d873UL, 0x62313153UL, 0x2a15153fUL, + 0x0804040cUL, 0x95c7c752UL, 0x46232365UL, 0x9dc3c35eUL, + 0x30181828UL, 0x379696a1UL, 0x0a05050fUL, 0x2f9a9ab5UL, + 0x0e070709UL, 0x24121236UL, 0x1b80809bUL, 0xdfe2e23dUL, + 0xcdebeb26UL, 0x4e272769UL, 0x7fb2b2cdUL, 0xea75759fUL, + 0x1209091bUL, 0x1d83839eUL, 0x582c2c74UL, 0x341a1a2eUL, + 0x361b1b2dUL, 0xdc6e6eb2UL, 0xb45a5aeeUL, 0x5ba0a0fbUL, + 0xa45252f6UL, 0x763b3b4dUL, 0xb7d6d661UL, 0x7db3b3ceUL, + 0x5229297bUL, 0xdde3e33eUL, 0x5e2f2f71UL, 0x13848497UL, + 0xa65353f5UL, 0xb9d1d168UL, 0x00000000UL, 0xc1eded2cUL, + 0x40202060UL, 0xe3fcfc1fUL, 0x79b1b1c8UL, 0xb65b5bedUL, + 0xd46a6abeUL, 0x8dcbcb46UL, 0x67bebed9UL, 0x7239394bUL, + 0x944a4adeUL, 0x984c4cd4UL, 0xb05858e8UL, 0x85cfcf4aUL, + 0xbbd0d06bUL, 0xc5efef2aUL, 0x4faaaae5UL, 0xedfbfb16UL, + 0x864343c5UL, 0x9a4d4dd7UL, 0x66333355UL, 0x11858594UL, + 0x8a4545cfUL, 0xe9f9f910UL, 0x04020206UL, 0xfe7f7f81UL, + 0xa05050f0UL, 0x783c3c44UL, 0x259f9fbaUL, 0x4ba8a8e3UL, + 0xa25151f3UL, 0x5da3a3feUL, 0x804040c0UL, 0x058f8f8aUL, + 0x3f9292adUL, 0x219d9dbcUL, 0x70383848UL, 0xf1f5f504UL, + 0x63bcbcdfUL, 0x77b6b6c1UL, 0xafdada75UL, 0x42212163UL, + 0x20101030UL, 0xe5ffff1aUL, 0xfdf3f30eUL, 0xbfd2d26dUL, + 0x81cdcd4cUL, 0x180c0c14UL, 0x26131335UL, 0xc3ecec2fUL, + 0xbe5f5fe1UL, 0x359797a2UL, 0x884444ccUL, 0x2e171739UL, + 0x93c4c457UL, 0x55a7a7f2UL, 0xfc7e7e82UL, 0x7a3d3d47UL, + 0xc86464acUL, 0xba5d5de7UL, 0x3219192bUL, 0xe6737395UL, + 0xc06060a0UL, 0x19818198UL, 0x9e4f4fd1UL, 0xa3dcdc7fUL, + 0x44222266UL, 0x542a2a7eUL, 0x3b9090abUL, 0x0b888883UL, + 0x8c4646caUL, 0xc7eeee29UL, 0x6bb8b8d3UL, 0x2814143cUL, + 0xa7dede79UL, 0xbc5e5ee2UL, 0x160b0b1dUL, 0xaddbdb76UL, + 0xdbe0e03bUL, 0x64323256UL, 0x743a3a4eUL, 0x140a0a1eUL, + 0x924949dbUL, 0x0c06060aUL, 0x4824246cUL, 0xb85c5ce4UL, + 0x9fc2c25dUL, 0xbdd3d36eUL, 0x43acacefUL, 0xc46262a6UL, + 0x399191a8UL, 0x319595a4UL, 0xd3e4e437UL, 0xf279798bUL, + 0xd5e7e732UL, 0x8bc8c843UL, 0x6e373759UL, 0xda6d6db7UL, + 0x018d8d8cUL, 0xb1d5d564UL, 0x9c4e4ed2UL, 0x49a9a9e0UL, + 0xd86c6cb4UL, 0xac5656faUL, 0xf3f4f407UL, 0xcfeaea25UL, + 0xca6565afUL, 0xf47a7a8eUL, 0x47aeaee9UL, 0x10080818UL, + 0x6fbabad5UL, 0xf0787888UL, 0x4a25256fUL, 0x5c2e2e72UL, + 0x381c1c24UL, 0x57a6a6f1UL, 0x73b4b4c7UL, 0x97c6c651UL, + 0xcbe8e823UL, 0xa1dddd7cUL, 0xe874749cUL, 0x3e1f1f21UL, + 0x964b4bddUL, 0x61bdbddcUL, 0x0d8b8b86UL, 0x0f8a8a85UL, + 0xe0707090UL, 0x7c3e3e42UL, 0x71b5b5c4UL, 0xcc6666aaUL, + 0x904848d8UL, 0x06030305UL, 0xf7f6f601UL, 0x1c0e0e12UL, + 0xc26161a3UL, 0x6a35355fUL, 0xae5757f9UL, 0x69b9b9d0UL, + 0x17868691UL, 0x99c1c158UL, 0x3a1d1d27UL, 0x279e9eb9UL, + 0xd9e1e138UL, 0xebf8f813UL, 0x2b9898b3UL, 0x22111133UL, + 0xd26969bbUL, 0xa9d9d970UL, 0x078e8e89UL, 0x339494a7UL, + 0x2d9b9bb6UL, 0x3c1e1e22UL, 0x15878792UL, 0xc9e9e920UL, + 0x87cece49UL, 0xaa5555ffUL, 0x50282878UL, 0xa5dfdf7aUL, + 0x038c8c8fUL, 0x59a1a1f8UL, 0x09898980UL, 0x1a0d0d17UL, + 0x65bfbfdaUL, 0xd7e6e631UL, 0x844242c6UL, 0xd06868b8UL, + 0x824141c3UL, 0x299999b0UL, 0x5a2d2d77UL, 0x1e0f0f11UL, + 0x7bb0b0cbUL, 0xa85454fcUL, 0x6dbbbbd6UL, 0x2c16163aUL, + //S + 0x63UL, 0x7cUL, 0x77UL, 0x7bUL, 0xf2UL, 0x6bUL, 0x6fUL, 0xc5UL, + 0x30UL, 0x01UL, 0x67UL, 0x2bUL, 0xfeUL, 0xd7UL, 0xabUL, 0x76UL, + 0xcaUL, 0x82UL, 0xc9UL, 0x7dUL, 0xfaUL, 0x59UL, 0x47UL, 0xf0UL, + 0xadUL, 0xd4UL, 0xa2UL, 0xafUL, 0x9cUL, 0xa4UL, 0x72UL, 0xc0UL, + 0xb7UL, 0xfdUL, 0x93UL, 0x26UL, 0x36UL, 0x3fUL, 0xf7UL, 0xccUL, + 0x34UL, 0xa5UL, 0xe5UL, 0xf1UL, 0x71UL, 0xd8UL, 0x31UL, 0x15UL, + 0x04UL, 0xc7UL, 0x23UL, 0xc3UL, 0x18UL, 0x96UL, 0x05UL, 0x9aUL, + 0x07UL, 0x12UL, 0x80UL, 0xe2UL, 0xebUL, 0x27UL, 0xb2UL, 0x75UL, + 0x09UL, 0x83UL, 0x2cUL, 0x1aUL, 0x1bUL, 0x6eUL, 0x5aUL, 0xa0UL, + 0x52UL, 0x3bUL, 0xd6UL, 0xb3UL, 0x29UL, 0xe3UL, 0x2fUL, 0x84UL, + 0x53UL, 0xd1UL, 0x00UL, 0xedUL, 0x20UL, 0xfcUL, 0xb1UL, 0x5bUL, + 0x6aUL, 0xcbUL, 0xbeUL, 0x39UL, 0x4aUL, 0x4cUL, 0x58UL, 0xcfUL, + 0xd0UL, 0xefUL, 0xaaUL, 0xfbUL, 0x43UL, 0x4dUL, 0x33UL, 0x85UL, + 0x45UL, 0xf9UL, 0x02UL, 0x7fUL, 0x50UL, 0x3cUL, 0x9fUL, 0xa8UL, + 0x51UL, 0xa3UL, 0x40UL, 0x8fUL, 0x92UL, 0x9dUL, 0x38UL, 0xf5UL, + 0xbcUL, 0xb6UL, 0xdaUL, 0x21UL, 0x10UL, 0xffUL, 0xf3UL, 0xd2UL, + 0xcdUL, 0x0cUL, 0x13UL, 0xecUL, 0x5fUL, 0x97UL, 0x44UL, 0x17UL, + 0xc4UL, 0xa7UL, 0x7eUL, 0x3dUL, 0x64UL, 0x5dUL, 0x19UL, 0x73UL, + 0x60UL, 0x81UL, 0x4fUL, 0xdcUL, 0x22UL, 0x2aUL, 0x90UL, 0x88UL, + 0x46UL, 0xeeUL, 0xb8UL, 0x14UL, 0xdeUL, 0x5eUL, 0x0bUL, 0xdbUL, + 0xe0UL, 0x32UL, 0x3aUL, 0x0aUL, 0x49UL, 0x06UL, 0x24UL, 0x5cUL, + 0xc2UL, 0xd3UL, 0xacUL, 0x62UL, 0x91UL, 0x95UL, 0xe4UL, 0x79UL, + 0xe7UL, 0xc8UL, 0x37UL, 0x6dUL, 0x8dUL, 0xd5UL, 0x4eUL, 0xa9UL, + 0x6cUL, 0x56UL, 0xf4UL, 0xeaUL, 0x65UL, 0x7aUL, 0xaeUL, 0x08UL, + 0xbaUL, 0x78UL, 0x25UL, 0x2eUL, 0x1cUL, 0xa6UL, 0xb4UL, 0xc6UL, + 0xe8UL, 0xddUL, 0x74UL, 0x1fUL, 0x4bUL, 0xbdUL, 0x8bUL, 0x8aUL, + 0x70UL, 0x3eUL, 0xb5UL, 0x66UL, 0x48UL, 0x03UL, 0xf6UL, 0x0eUL, + 0x61UL, 0x35UL, 0x57UL, 0xb9UL, 0x86UL, 0xc1UL, 0x1dUL, 0x9eUL, + 0xe1UL, 0xf8UL, 0x98UL, 0x11UL, 0x69UL, 0xd9UL, 0x8eUL, 0x94UL, + 0x9bUL, 0x1eUL, 0x87UL, 0xe9UL, 0xceUL, 0x55UL, 0x28UL, 0xdfUL, + 0x8cUL, 0xa1UL, 0x89UL, 0x0dUL, 0xbfUL, 0xe6UL, 0x42UL, 0x68UL, + 0x41UL, 0x99UL, 0x2dUL, 0x0fUL, 0xb0UL, 0x54UL, 0xbbUL, 0x16UL, + //rcon + 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL, + 0x10000000UL, 0x20000000UL, 0x40000000UL, 0x80000000UL, + 0x1B000000UL, 0x36000000UL, 0UL, 0UL, + 0UL, 0UL, 0UL, 0UL +}; + + +/** + * AES_TD[] table for AES decryption + */ +juint StubRoutines::aarch32::_aes_td_table[] + __attribute__ ((aligned(2048))) = +{ + //T + 0x51f4a750UL, 0x7e416553UL, 0x1a17a4c3UL, 0x3a275e96UL, + 0x3bab6bcbUL, 0x1f9d45f1UL, 0xacfa58abUL, 0x4be30393UL, + 0x2030fa55UL, 0xad766df6UL, 0x88cc7691UL, 0xf5024c25UL, + 0x4fe5d7fcUL, 0xc52acbd7UL, 0x26354480UL, 0xb562a38fUL, + 0xdeb15a49UL, 0x25ba1b67UL, 0x45ea0e98UL, 0x5dfec0e1UL, + 0xc32f7502UL, 0x814cf012UL, 0x8d4697a3UL, 0x6bd3f9c6UL, + 0x038f5fe7UL, 0x15929c95UL, 0xbf6d7aebUL, 0x955259daUL, + 0xd4be832dUL, 0x587421d3UL, 0x49e06929UL, 0x8ec9c844UL, + 0x75c2896aUL, 0xf48e7978UL, 0x99583e6bUL, 0x27b971ddUL, + 0xbee14fb6UL, 0xf088ad17UL, 0xc920ac66UL, 0x7dce3ab4UL, + 0x63df4a18UL, 0xe51a3182UL, 0x97513360UL, 0x62537f45UL, + 0xb16477e0UL, 0xbb6bae84UL, 0xfe81a01cUL, 0xf9082b94UL, + 0x70486858UL, 0x8f45fd19UL, 0x94de6c87UL, 0x527bf8b7UL, + 0xab73d323UL, 0x724b02e2UL, 0xe31f8f57UL, 0x6655ab2aUL, + 0xb2eb2807UL, 0x2fb5c203UL, 0x86c57b9aUL, 0xd33708a5UL, + 0x302887f2UL, 0x23bfa5b2UL, 0x02036abaUL, 0xed16825cUL, + 0x8acf1c2bUL, 0xa779b492UL, 0xf307f2f0UL, 0x4e69e2a1UL, + 0x65daf4cdUL, 0x0605bed5UL, 0xd134621fUL, 0xc4a6fe8aUL, + 0x342e539dUL, 0xa2f355a0UL, 0x058ae132UL, 0xa4f6eb75UL, + 0x0b83ec39UL, 0x4060efaaUL, 0x5e719f06UL, 0xbd6e1051UL, + 0x3e218af9UL, 0x96dd063dUL, 0xdd3e05aeUL, 0x4de6bd46UL, + 0x91548db5UL, 0x71c45d05UL, 0x0406d46fUL, 0x605015ffUL, + 0x1998fb24UL, 0xd6bde997UL, 0x894043ccUL, 0x67d99e77UL, + 0xb0e842bdUL, 0x07898b88UL, 0xe7195b38UL, 0x79c8eedbUL, + 0xa17c0a47UL, 0x7c420fe9UL, 0xf8841ec9UL, 0x00000000UL, + 0x09808683UL, 0x322bed48UL, 0x1e1170acUL, 0x6c5a724eUL, + 0xfd0efffbUL, 0x0f853856UL, 0x3daed51eUL, 0x362d3927UL, + 0x0a0fd964UL, 0x685ca621UL, 0x9b5b54d1UL, 0x24362e3aUL, + 0x0c0a67b1UL, 0x9357e70fUL, 0xb4ee96d2UL, 0x1b9b919eUL, + 0x80c0c54fUL, 0x61dc20a2UL, 0x5a774b69UL, 0x1c121a16UL, + 0xe293ba0aUL, 0xc0a02ae5UL, 0x3c22e043UL, 0x121b171dUL, + 0x0e090d0bUL, 0xf28bc7adUL, 0x2db6a8b9UL, 0x141ea9c8UL, + 0x57f11985UL, 0xaf75074cUL, 0xee99ddbbUL, 0xa37f60fdUL, + 0xf701269fUL, 0x5c72f5bcUL, 0x44663bc5UL, 0x5bfb7e34UL, + 0x8b432976UL, 0xcb23c6dcUL, 0xb6edfc68UL, 0xb8e4f163UL, + 0xd731dccaUL, 0x42638510UL, 0x13972240UL, 0x84c61120UL, + 0x854a247dUL, 0xd2bb3df8UL, 0xaef93211UL, 0xc729a16dUL, + 0x1d9e2f4bUL, 0xdcb230f3UL, 0x0d8652ecUL, 0x77c1e3d0UL, + 0x2bb3166cUL, 0xa970b999UL, 0x119448faUL, 0x47e96422UL, + 0xa8fc8cc4UL, 0xa0f03f1aUL, 0x567d2cd8UL, 0x223390efUL, + 0x87494ec7UL, 0xd938d1c1UL, 0x8ccaa2feUL, 0x98d40b36UL, + 0xa6f581cfUL, 0xa57ade28UL, 0xdab78e26UL, 0x3fadbfa4UL, + 0x2c3a9de4UL, 0x5078920dUL, 0x6a5fcc9bUL, 0x547e4662UL, + 0xf68d13c2UL, 0x90d8b8e8UL, 0x2e39f75eUL, 0x82c3aff5UL, + 0x9f5d80beUL, 0x69d0937cUL, 0x6fd52da9UL, 0xcf2512b3UL, + 0xc8ac993bUL, 0x10187da7UL, 0xe89c636eUL, 0xdb3bbb7bUL, + 0xcd267809UL, 0x6e5918f4UL, 0xec9ab701UL, 0x834f9aa8UL, + 0xe6956e65UL, 0xaaffe67eUL, 0x21bccf08UL, 0xef15e8e6UL, + 0xbae79bd9UL, 0x4a6f36ceUL, 0xea9f09d4UL, 0x29b07cd6UL, + 0x31a4b2afUL, 0x2a3f2331UL, 0xc6a59430UL, 0x35a266c0UL, + 0x744ebc37UL, 0xfc82caa6UL, 0xe090d0b0UL, 0x33a7d815UL, + 0xf104984aUL, 0x41ecdaf7UL, 0x7fcd500eUL, 0x1791f62fUL, + 0x764dd68dUL, 0x43efb04dUL, 0xccaa4d54UL, 0xe49604dfUL, + 0x9ed1b5e3UL, 0x4c6a881bUL, 0xc12c1fb8UL, 0x4665517fUL, + 0x9d5eea04UL, 0x018c355dUL, 0xfa877473UL, 0xfb0b412eUL, + 0xb3671d5aUL, 0x92dbd252UL, 0xe9105633UL, 0x6dd64713UL, + 0x9ad7618cUL, 0x37a10c7aUL, 0x59f8148eUL, 0xeb133c89UL, + 0xcea927eeUL, 0xb761c935UL, 0xe11ce5edUL, 0x7a47b13cUL, + 0x9cd2df59UL, 0x55f2733fUL, 0x1814ce79UL, 0x73c737bfUL, + 0x53f7cdeaUL, 0x5ffdaa5bUL, 0xdf3d6f14UL, 0x7844db86UL, + 0xcaaff381UL, 0xb968c43eUL, 0x3824342cUL, 0xc2a3405fUL, + 0x161dc372UL, 0xbce2250cUL, 0x283c498bUL, 0xff0d9541UL, + 0x39a80171UL, 0x080cb3deUL, 0xd8b4e49cUL, 0x6456c190UL, + 0x7bcb8461UL, 0xd532b670UL, 0x486c5c74UL, 0xd0b85742UL, +//S + 0x52UL, 0x09UL, 0x6aUL, 0xd5UL, 0x30UL, 0x36UL, 0xa5UL, 0x38UL, + 0xbfUL, 0x40UL, 0xa3UL, 0x9eUL, 0x81UL, 0xf3UL, 0xd7UL, 0xfbUL, + 0x7cUL, 0xe3UL, 0x39UL, 0x82UL, 0x9bUL, 0x2fUL, 0xffUL, 0x87UL, + 0x34UL, 0x8eUL, 0x43UL, 0x44UL, 0xc4UL, 0xdeUL, 0xe9UL, 0xcbUL, + 0x54UL, 0x7bUL, 0x94UL, 0x32UL, 0xa6UL, 0xc2UL, 0x23UL, 0x3dUL, + 0xeeUL, 0x4cUL, 0x95UL, 0x0bUL, 0x42UL, 0xfaUL, 0xc3UL, 0x4eUL, + 0x08UL, 0x2eUL, 0xa1UL, 0x66UL, 0x28UL, 0xd9UL, 0x24UL, 0xb2UL, + 0x76UL, 0x5bUL, 0xa2UL, 0x49UL, 0x6dUL, 0x8bUL, 0xd1UL, 0x25UL, + 0x72UL, 0xf8UL, 0xf6UL, 0x64UL, 0x86UL, 0x68UL, 0x98UL, 0x16UL, + 0xd4UL, 0xa4UL, 0x5cUL, 0xccUL, 0x5dUL, 0x65UL, 0xb6UL, 0x92UL, + 0x6cUL, 0x70UL, 0x48UL, 0x50UL, 0xfdUL, 0xedUL, 0xb9UL, 0xdaUL, + 0x5eUL, 0x15UL, 0x46UL, 0x57UL, 0xa7UL, 0x8dUL, 0x9dUL, 0x84UL, + 0x90UL, 0xd8UL, 0xabUL, 0x00UL, 0x8cUL, 0xbcUL, 0xd3UL, 0x0aUL, + 0xf7UL, 0xe4UL, 0x58UL, 0x05UL, 0xb8UL, 0xb3UL, 0x45UL, 0x06UL, + 0xd0UL, 0x2cUL, 0x1eUL, 0x8fUL, 0xcaUL, 0x3fUL, 0x0fUL, 0x02UL, + 0xc1UL, 0xafUL, 0xbdUL, 0x03UL, 0x01UL, 0x13UL, 0x8aUL, 0x6bUL, + 0x3aUL, 0x91UL, 0x11UL, 0x41UL, 0x4fUL, 0x67UL, 0xdcUL, 0xeaUL, + 0x97UL, 0xf2UL, 0xcfUL, 0xceUL, 0xf0UL, 0xb4UL, 0xe6UL, 0x73UL, + 0x96UL, 0xacUL, 0x74UL, 0x22UL, 0xe7UL, 0xadUL, 0x35UL, 0x85UL, + 0xe2UL, 0xf9UL, 0x37UL, 0xe8UL, 0x1cUL, 0x75UL, 0xdfUL, 0x6eUL, + 0x47UL, 0xf1UL, 0x1aUL, 0x71UL, 0x1dUL, 0x29UL, 0xc5UL, 0x89UL, + 0x6fUL, 0xb7UL, 0x62UL, 0x0eUL, 0xaaUL, 0x18UL, 0xbeUL, 0x1bUL, + 0xfcUL, 0x56UL, 0x3eUL, 0x4bUL, 0xc6UL, 0xd2UL, 0x79UL, 0x20UL, + 0x9aUL, 0xdbUL, 0xc0UL, 0xfeUL, 0x78UL, 0xcdUL, 0x5aUL, 0xf4UL, + 0x1fUL, 0xddUL, 0xa8UL, 0x33UL, 0x88UL, 0x07UL, 0xc7UL, 0x31UL, + 0xb1UL, 0x12UL, 0x10UL, 0x59UL, 0x27UL, 0x80UL, 0xecUL, 0x5fUL, + 0x60UL, 0x51UL, 0x7fUL, 0xa9UL, 0x19UL, 0xb5UL, 0x4aUL, 0x0dUL, + 0x2dUL, 0xe5UL, 0x7aUL, 0x9fUL, 0x93UL, 0xc9UL, 0x9cUL, 0xefUL, + 0xa0UL, 0xe0UL, 0x3bUL, 0x4dUL, 0xaeUL, 0x2aUL, 0xf5UL, 0xb0UL, + 0xc8UL, 0xebUL, 0xbbUL, 0x3cUL, 0x83UL, 0x53UL, 0x99UL, 0x61UL, + 0x17UL, 0x2bUL, 0x04UL, 0x7eUL, 0xbaUL, 0x77UL, 0xd6UL, 0x26UL, + 0xe1UL, 0x69UL, 0x14UL, 0x63UL, 0x55UL, 0x21UL, 0x0cUL, 0x7dUL +}; + +/** + * SHA256[] table for SHA256 Digest + */ +juint StubRoutines::aarch32::_sha1_table[] + __attribute__ ((aligned(1024))) = +{ + //k + 0x5A827999UL, 0x6ED9EBA1UL, 0x8F1BBCDCUL, 0xCA62C1D6UL +}; + +/** + * SHA256[] table for SHA256 Digest + */ +juint StubRoutines::aarch32::_sha256_table[] + __attribute__ ((aligned(1024))) = +{ + //k + 0x428A2F98UL, 0x71374491UL, 0xB5C0FBCFUL, 0xE9B5DBA5UL, + 0x3956C25BUL, 0x59F111F1UL, 0x923F82A4UL, 0xAB1C5ED5UL, + 0xD807AA98UL, 0x12835B01UL, 0x243185BEUL, 0x550C7DC3UL, + 0x72BE5D74UL, 0x80DEB1FEUL, 0x9BDC06A7UL, 0xC19BF174UL, + 0xE49B69C1UL, 0xEFBE4786UL, 0x0FC19DC6UL, 0x240CA1CCUL, + 0x2DE92C6FUL, 0x4A7484AAUL, 0x5CB0A9DCUL, 0x76F988DAUL, + 0x983E5152UL, 0xA831C66DUL, 0xB00327C8UL, 0xBF597FC7UL, + 0xC6E00BF3UL, 0xD5A79147UL, 0x06CA6351UL, 0x14292967UL, + 0x27B70A85UL, 0x2E1B2138UL, 0x4D2C6DFCUL, 0x53380D13UL, + 0x650A7354UL, 0x766A0ABBUL, 0x81C2C92EUL, 0x92722C85UL, + 0xA2BFE8A1UL, 0xA81A664BUL, 0xC24B8B70UL, 0xC76C51A3UL, + 0xD192E819UL, 0xD6990624UL, 0xF40E3585UL, 0x106AA070UL, + 0x19A4C116UL, 0x1E376C08UL, 0x2748774CUL, 0x34B0BCB5UL, + 0x391C0CB3UL, 0x4ED8AA4AUL, 0x5B9CCA4FUL, 0x682E6FF3UL, + 0x748F82EEUL, 0x78A5636FUL, 0x84C87814UL, 0x8CC70208UL, + 0x90BEFFFAUL, 0xA4506CEBUL, 0xBEF9A3F7UL, 0xC67178F2UL +}; + +/** + * SHA512[] table for SHA512 Digest + */ +julong StubRoutines::aarch32::_sha512_table[] + __attribute__ ((aligned(1024))) = +{ + //k + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL +}; + +address StubRoutines::_cipherBlockChaining_encryptAESCrypt_special = NULL; +address StubRoutines::_cipherBlockChaining_decryptAESCrypt_special = NULL; +address StubRoutines::_aes_table_te_addr = NULL; +address StubRoutines::_aes_table_td_addr = NULL; + +address StubRoutines::_sha1_table_addr = NULL; +address StubRoutines::_sha256_table_addr = NULL; +address StubRoutines::_sha512_table_addr = NULL; --- /dev/null 2018-09-25 19:25:27.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/stubRoutines_aarch32.hpp 2018-09-25 19:25:27.000000000 +0300 @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_STUBROUTINES_AARCH32_HPP +#define CPU_AARCH32_VM_STUBROUTINES_AARCH32_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to +// extend it. + +// n.b. if we are notifying entry/exit to the simulator then the call +// stub does a notify at normal return placing +// call_stub_return_address one instruction beyond the notify. the +// latter address is sued by the stack unwind code when doign an +// exception return. +static bool returns_to_call_stub(address return_pc) { + return return_pc == _call_stub_return_address; +} + +enum platform_dependent_constants { + code_size1 = 19000, // simply increase if too small (assembler will crash if too small) + code_size2 = 22000 // simply increase if too small (assembler will crash if too small) +}; + +class aarch32 { + friend class StubGenerator; + + private: +#ifdef COMPILER2 + static address _idiv_entry; + static address _irem_entry; + static address _partial_subtype_check; + static address _string_compress_neon; + static address _string_inflate_neon; +#endif + + public: + +#ifdef COMPILER2 + static address idiv_entry() { + return _idiv_entry; + } + + static address irem_entry() { + return _irem_entry; + } + + static address partial_subtype_check() { + return _partial_subtype_check; + } + + static address string_compress_neon() { + return _string_compress_neon; + } + + static address string_inflate_neon() { + return _string_inflate_neon; + } +#endif + + private: + static juint _crc_table[]; + static juint _crc32c_table[]; + + private: + static juint _aes_te_table[]; + static juint _aes_td_table[]; + + private: + static juint _sha1_table[]; + static juint _sha256_table[]; + static julong _sha512_table[]; +}; + + + static address _cipherBlockChaining_encryptAESCrypt_special; + static address _cipherBlockChaining_decryptAESCrypt_special; + + static address _aes_table_te_addr; + static address _aes_table_td_addr; + + static address _sha1_table_addr; + static address _sha256_table_addr; + static address _sha512_table_addr; + +public: + static address cipherBlockChaining_encryptAESCrypt_special() { return _cipherBlockChaining_encryptAESCrypt_special; } + static address cipherBlockChaining_decryptAESCrypt_special() { return _cipherBlockChaining_decryptAESCrypt_special; } + static address aes_table_te_addr() { return _aes_table_te_addr; } + static address aes_table_td_addr() { return _aes_table_td_addr; } + + static address sha1_table_addr() { return _sha1_table_addr; } + static address sha256_table_addr() { return _sha256_table_addr; } + static address sha512_table_addr() { return _sha512_table_addr; } + +#endif // CPU_AARCH32_VM_STUBROUTINES_AARCH32_HPP --- /dev/null 2018-09-25 19:25:28.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/templateInterpreterGenerator_aarch32.cpp 2018-09-25 19:25:28.000000000 +0300 @@ -0,0 +1,2312 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interp_masm_aarch32.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/bytecodeTracer.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" + +#include + +#ifndef PRODUCT +#include "oops/method.hpp" +#include "vm_version_aarch32.hpp" +#endif // !PRODUCT + +// Size of interpreter code. Increase if too small. Interpreter will +// fail with a guarantee ("not enough space for interpreter generation"); +// if too small. +// Run with +PrintInterpreter to get the VM to print out the size. +// Max size with JVMTI +int TemplateInterpreter::InterpreterCodeSize = 200 * 1024; + +#define __ _masm-> + +//----------------------------------------------------------------------------- + +extern "C" void entry(CodeBuffer*); + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_slow_signature_handler() { + address entry = __ pc(); + + // The sp should be aligned on entry to the bottom of where the integer args + // need to be copied to. + + // rmethod + // rlocals + // c_rarg3: first stack arg - wordSize + + __ mov(c_rarg3, sp); + __ sub(sp, sp, 22 * wordSize); + __ str(lr, sp); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::slow_signature_handler), + rmethod, rlocals, c_rarg3); + + // r0: result handler + + // Stack layout: + // rsp: return address <- sp (lowest addr) + // 1 float/double identifiers with the following structure: + // 16 bit - 2 bits per word free/in use indication (0==in use) + // 8 bits - 1 bit per word, double/float indication (0==double) + // 4 integer args (if static first is unused) + // 8 double args (defined by ARM calling convention spec) + // stack args <- sp (on entry) + // garbage + // expression stack bottom + // bcp (NULL) + // ... + // If this changes, update interpreterRt_aarch32.cpp slowpath! + + // Restore LR + __ ldr(lr, sp); + +#ifdef HARD_FLOAT_CC + // Do FP first so we can use c_rarg3 as temp + __ ldr(c_rarg3, Address(sp, wordSize)); // float/double identifiers + + { + Label fp_done; + // each iteration covers either single double register or up to 2 float registers + for (int i = 0; i < Argument::n_float_register_parameters_c; i++) { + Label d, done; + + __ tst(c_rarg3, 1 << i+16); + __ b(d, __ EQ); + __ tst(c_rarg3, 1 << i*2); + __ b(fp_done, __ NE); + __ vldr_f32(as_FloatRegister(i*2), Address(sp, (6 + 2 * i) * wordSize)); + __ tst(c_rarg3, 1 << i*2+1); + __ vldr_f32(as_FloatRegister(i*2+1), Address(sp, (7 + 2 * i) * wordSize), __ EQ); + __ b(done); + __ bind(d); + __ vldr_f64(as_DoubleFloatRegister(i), Address(sp, (6 + 2 * i) * wordSize)); + __ bind(done); + } + __ bind(fp_done); + } +#endif // HARD_FLOAT_CC + + // c_rarg0 contains the result from the call of + // InterpreterRuntime::slow_signature_handler so we don't touch it + // here. It will be loaded with the JNIEnv* later. + __ ldr(c_rarg1, Address(sp, 2 * wordSize)); + __ ldrd(c_rarg2, c_rarg3, Address(sp, 3 * wordSize)); + + __ add(sp, sp, 22 * wordSize); + __ b(lr); + + return entry; +} + + +// +// Various method entries +// + +address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + // rmethod: Method* + // r4: sender sp + // sp: args + + //if (!InlineIntrinsics) return NULL; // Generate a vanilla entry + // FIXME currently ignoring this flag and inlining anyway + + // These don't need a safepoint check because they aren't virtually + // callable. We won't enter these intrinsics from compiled code. + // If in the future we added an intrinsic which was virtually callable + // we'd have to worry about how to safepoint so that this code is used. + + // mathematical functions inlined by compiler + // (interpreter must provide identical implementation + // in order to avoid monotonicity bugs when switching + // from interpreter to compiler in the middle of some + // computation) + // + // stack: + // [ arg ] <-- sp + // [ arg ] + // retaddr in lr + + address entry_point = NULL; + Register continuation = lr; + bool transcendental_entry = false; + + switch (kind) { + case Interpreter::java_lang_math_abs: + entry_point = __ pc(); + if(hasFPU()) { + __ vldr_f64(d0, Address(sp)); + __ vabs_f64(d0, d0); + } else { + __ ldrd(r0, Address(sp)); + transcendental_entry = true; + } + break; + case Interpreter::java_lang_math_sqrt: + entry_point = __ pc(); + if(hasFPU()) { + __ vldr_f64(d0, Address(sp)); + __ vsqrt_f64(d0, d0); + } else { + __ ldrd(r0, Address(sp)); + transcendental_entry = true; + } + break; + case Interpreter::java_lang_math_sin : + case Interpreter::java_lang_math_cos : + case Interpreter::java_lang_math_tan : + case Interpreter::java_lang_math_log : + case Interpreter::java_lang_math_log10 : + case Interpreter::java_lang_math_exp : + entry_point = __ pc(); + transcendental_entry = true; +#ifndef HARD_FLOAT_CC + __ ldrd(r0, Address(sp)); +#else + __ vldr_f64(d0, Address(sp)); +#endif //HARD_FLOAT_CC + break; + case Interpreter::java_lang_math_pow : + entry_point = __ pc(); + transcendental_entry = true; +#ifndef HARD_FLOAT_CC + __ ldrd(r0, Address(sp, 2*Interpreter::stackElementSize)); + __ ldrd(r2, Address(sp)); +#else + __ vldr_f64(d0, Address(sp, 2*Interpreter::stackElementSize)); + __ vldr_f64(d1, Address(sp)); +#endif //HARD_FLOAT_CC + break; + case Interpreter::java_lang_math_fmaD : + case Interpreter::java_lang_math_fmaF : + if (UseFMA) { + __ unimplemented(); + } + break; + default: + ShouldNotReachHere(); + } + + if (entry_point) { + __ mov(sp, r4); + + if(transcendental_entry) { + __ mov(r4, lr); + continuation = r4; + generate_transcendental_entry(kind); +#ifndef HARD_FLOAT_CC + if(hasFPU()) { + __ vmov_f64(d0, r0, r1); + } +#endif + } + + __ b(continuation); + } + + return entry_point; +} + + // double trigonometrics and transcendentals + // static jdouble dsin(jdouble x); + // static jdouble dcos(jdouble x); + // static jdouble dtan(jdouble x); + // static jdouble dlog(jdouble x); + // static jdouble dlog10(jdouble x); + // static jdouble dexp(jdouble x); + // static jdouble dpow(jdouble x, jdouble y); + +void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind) { + address fn = NULL; + switch (kind) { +#ifdef __SOFTFP__ + case Interpreter::java_lang_math_abs: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dabs); + break; + case Interpreter::java_lang_math_sqrt: + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt); + break; +#endif //__SOFTFP__ + case Interpreter::java_lang_math_sin : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); + break; + case Interpreter::java_lang_math_cos : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); + break; + case Interpreter::java_lang_math_tan : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); + break; + case Interpreter::java_lang_math_log : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); + break; + case Interpreter::java_lang_math_log10 : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); + break; + case Interpreter::java_lang_math_exp : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); + break; + case Interpreter::java_lang_math_pow : + fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); + break; + default: + ShouldNotReachHere(); + } + __ align_stack(); + __ mov(rscratch1, fn); + __ bl(rscratch1); +} + +// Abstract method entry +// Attempt to execute abstract method. Throw exception +address TemplateInterpreterGenerator::generate_abstract_entry(void) { + // rmethod: Method* + // r13: sender SP + + address entry_point = __ pc(); + + // abstract method entry + + // pop return address, reset last_sp to NULL + __ empty_expression_stack(); + __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + + // throw exception + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_AbstractMethodErrorWithMethod), + rmethod); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + return entry_point; +} + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + +#ifdef ASSERT + { + Label L; + __ ldr(rscratch1, Address(rfp, + frame::get_interpreter_frame_monitor_block_top_offset() * + wordSize)); + __ mov(rscratch2, sp); + __ cmp(rscratch1, rscratch2); // maximal rsp for current rfp (stack + // grows negative) + __ b(L, Assembler::HS); // check if frame is complete + __ stop ("interpreter frame not set up"); + __ bind(L); + } +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // setup parameters + + // ??? convention: expect aberrant index in register r2 + // ??? convention: expect array in register r3 + __ mov(c_rarg1, r3); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ArrayIndexOutOfBoundsException), + c_rarg1, c_rarg2); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + + // object is at TOS + __ pop(c_rarg1); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ClassCastException), + c_rarg1); + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common( + const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + if (pass_oop) { + // object is at TOS + __ pop(c_rarg2); + } + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // FIXME shouldn't it be in rest of generate_* ? + // rdispatch assumed to cache dispatch table. This code can be called from + // signal handler, so it can't assume execption caller preserved the register, + // so restore it here + __ get_dispatch(); + // FIXME shouldn't get_method be here ? + // setup parameters + __ lea(c_rarg1, Address((address)name)); + if (pass_oop) { + __ call_VM(r0, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + create_klass_exception), + c_rarg1, c_rarg2); + } else { + // kind of lame ExternalAddress can't take NULL because + // external_word_Relocation will assert. + if (message != NULL) { + __ lea(c_rarg2, Address((address)message)); + } else { + __ mov(c_rarg2, NULL_WORD); + } + __ call_VM(r0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), + c_rarg1, c_rarg2); + } + // throw exception + __ b(address(Interpreter::throw_exception_entry())); + return entry; +} + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + address entry = __ pc(); + + __ print_method_exit(); + __ reg_printf("A. return_entry : 0x%08x%08x\n", r1, r0); + + // Restore stack bottom in case i2c adjusted stack + __ ldr(sp, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + // and NULL it as marker that sp is now tos until next java call + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + __ reg_printf("B. return_entry : 0x%08x%08x\n", r1, r0); + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(rmethod); + __ reg_printf("C. return_entry : 0x%08x%08x\n", r1, r0); + + if (state == atos) { + Register obj = r0; + Register mdp = r1; + Register tmp = r2; + __ ldr(mdp, Address(rmethod, Method::method_data_offset())); + __ profile_return_type(mdp, obj, tmp); + } + + // Pop N words from the stack + __ get_cache_and_index_at_bcp(r3, r2, 1, index_size); + __ reg_printf("D. return_entry : 0x%08x%08x\n", r1, r0); + __ ldr(r3, Address(r3, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ andr(r3, r3, ConstantPoolCacheEntry::parameter_size_mask); + + __ add(sp, sp, r3, lsl(2)); + + // Restore machine SP + /*__ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); + __ ldr(rscratch2, + Address(rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, lsl(2)); + __ bic(sp, rscratch1, 0xf);*/ + + __ check_and_handle_popframe(rthread); + __ check_and_handle_earlyret(rthread); + + __ get_dispatch(); + __ reg_printf("E. return_entry : 0x%08x%08x\n", r1, r0); + __ dispatch_next(state, step); + + return entry; +} + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, + int step, + address continuation) { + address entry = __ pc(); + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(rmethod); + + __ get_dispatch(); + + // Calculate stack limit + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); + __ ldr(rscratch2, + Address(rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, lsl(2)); + __ bic(sp, rscratch1, 0xf); + + // Restore expression stack pointer + __ ldr(sp, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + // NULL last_sp until next java call + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + + // handle exceptions + { + Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + if (continuation == NULL) { + __ dispatch_next(state, step); + } else { + __ jump_to_entry(continuation); + } + return entry; +} + + +address TemplateInterpreterGenerator::generate_result_handler_for( + BasicType type) { + address entry = __ pc(); + switch (type) { + case T_BOOLEAN: __ c2bool(r0); break; + case T_CHAR : __ uxth(r0, r0); break; + case T_BYTE : __ sxtb(r0, r0); break; + case T_SHORT : __ sxth(r0, r0); break; + case T_INT : /* nothing to do */ break; + case T_LONG : /* nothing to do */ break; + case T_VOID : /* nothing to do */ break; + case T_FLOAT : +#ifndef HARD_FLOAT_CC + if(hasFPU()) { + __ vmov_f32(d0, r0); + } +#endif + break; + case T_DOUBLE : +#ifndef HARD_FLOAT_CC + if(hasFPU()) { + __ vmov_f64(d0, r0, r1); + } +#endif + break; + case T_OBJECT : + // retrieve result from frame + __ reg_printf("In object result handler\n"); + __ ldr(r0, Address(rfp, frame::get_interpreter_frame_oop_temp_offset()*wordSize)); + // and verify it + __ verify_oop(r0); + break; + default : ShouldNotReachHere(); + } + __ b(lr); // return from result handler + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for( + TosState state, + address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ membar(Assembler::AnyAny); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} + +// Helpers for commoning out cases in the various type of method entries. +// + + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// rmethod: method +// +void TemplateInterpreterGenerator::generate_counter_incr( + Label* overflow, + Label* profile_method, + Label* profile_method_continue) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. + if (TieredCompilation) { + int increment = InvocationCounter::count_increment; + Label no_mdo; + if (ProfileInterpreter) { + // Are we profiling? + __ ldr(r0, Address(rmethod, Method::method_data_offset())); + __ cbz(r0, no_mdo); + // Increment counter in the MDO + const Address mdo_invocation_counter(r0, in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address mask(r0, in_bytes(MethodData::invoke_mask_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, rscratch1, rscratch2, false, Assembler::EQ, overflow); + __ b(done); + } + __ bind(no_mdo); + // Increment counter in MethodCounters + const Address invocation_counter(rscratch2, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + const Address mask(rscratch2, in_bytes(MethodCounters::invoke_mask_offset())); + __ get_method_counters(rmethod, rscratch2, done); + __ increment_mask_and_jump(invocation_counter, increment, mask, rscratch1, rscratch2, false, Assembler::EQ, overflow); + __ bind(done); + } else { // not TieredCompilation + const Address backedge_counter(rscratch2, + MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset()); + const Address invocation_counter(rscratch2, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + + __ get_method_counters(rmethod, rscratch2, done); + + if (ProfileInterpreter) { // %%% Merge this into MethodData* + __ ldr(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset())); + __ add(r1, r1, 1); + __ str(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset())); + } + // Update standard invocation counters + __ ldr(r1, invocation_counter); + __ ldr(r0, backedge_counter); + + __ add(r1, r1, InvocationCounter::count_increment); + __ mov(rscratch1, InvocationCounter::count_mask_value); + __ andr(r0, r0, rscratch1); + + __ str(r1, invocation_counter); + __ add(r0, r0, r1); // add both counters + + // profile_method is non-null only for interpreted method so + // profile_method != NULL == !native_call + + if (ProfileInterpreter && profile_method != NULL) { + // Test to see if we should create a method data oop + __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset())); + __ ldr(rscratch2, Address(rscratch2, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); + __ cmp(r0, rscratch2); + __ b(*profile_method_continue, Assembler::LT); + + // if no method data exists, go to profile_method + __ test_method_data_pointer(rscratch2, *profile_method); + } + + { + __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset())); + __ ldr(rscratch2, Address(rscratch2, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); + __ cmp(r0, rscratch2); + __ b(*overflow, Assembler::HS); + } + __ bind(done); + } +} + +void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { + + // Asm interpreter on entry + // On return (i.e. jump to entry_point) [ back to invocation of interpreter ] + // Everything as it was on entry + + // InterpreterRuntime::frequency_counter_overflow takes two + // arguments, the first (thread) is passed by call_VM, the second + // indicates if the counter overflow occurs at a backwards branch + // (NULL bcp). We pass zero for it. The call returns the address + // of the verified entry point for the method or NULL if the + // compilation did not complete (either went background or bailed + // out). + __ mov(c_rarg1, 0); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), + c_rarg1); + + __ b(do_continue); +} + +// See if we've got enough room on the stack for locals plus overhead +// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError +// without going through the signal handler, i.e., reserved and yellow zones +// will not be made usable. The shadow zone must suffice to handle the +// overflow. +// The expression stack grows down incrementally, so the normal guard +// page mechanism will work for that. +// +// NOTE: Since the additional locals are also always pushed (wasn't +// obvious in generate_method_entry) so the guard should work for them +// too. +// +// Args: +// r3: number of additional locals this frame needs (what we must check) +// rmethod: Method* +// +// Kills: +// r0 +void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { + + // monitor entry size: see picture of stack set + // (generate_method_entry) and frame_amd64.hpp + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + // total overhead size: entry_size + (saved rbp through expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::get_interpreter_frame_initial_sp_offset() * wordSize) + entry_size; + + const int page_size = os::vm_page_size(); + + Label after_frame_check; + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // for the additional locals. + // + __ mov(rscratch1, (page_size - overhead_size) / Interpreter::stackElementSize); + __ cmp(r3, rscratch1); + __ b(after_frame_check, Assembler::LS); + + // compute rsp as if this were going to be the last frame on + // the stack before the red zone + + // locals + overhead, in bytes + __ mov(r0, overhead_size); + __ add(r0, r0, r3, lsl(Interpreter::logStackElementSize)); // 1 slot per parameter. + + const Address stack_limit(rthread, JavaThread::stack_overflow_limit_offset()); + __ ldr(rscratch1, stack_limit); + +#ifdef ASSERT + Label limit_okay; + // Verify that thread stack limit is non-zero. + __ cbnz(rscratch1, limit_okay); + __ stop("stack overflow limit is zero"); + __ bind(limit_okay); +#endif + + // Add stack limit to locals. + __ add(r0, r0, rscratch1); + + // Check against the current stack bottom. + __ cmp(sp, r0); + __ b(after_frame_check, Assembler::HI); + + // Remove the incoming args, peeling the machine SP back to where it + // was in the caller. This is not strictly necessary, but unless we + // do so the stack frame may have a garbage FP; this ensures a + // correct call stack that we can always unwind. + __ mov(sp, r4); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); + + // all done with frame size check + __ bind(after_frame_check); +} + +// Allocate monitor and lock method (asm interpreter) +// +// Args: +// rmethod: Method* +// rlocals: locals +// +// Kills: +// r0 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) +// rscratch1, rscratch2 (scratch regs) +void TemplateInterpreterGenerator::lock_method(void) { + // synchronize method + const Address access_flags(rmethod, Method::access_flags_offset()); + const Address monitor_block_top( + rfp, + frame::get_interpreter_frame_monitor_block_top_offset() * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + +#ifdef ASSERT + { + Label L; + __ ldr(r0, access_flags); + __ tst(r0, JVM_ACC_SYNCHRONIZED); + __ b(L, Assembler::NE); + __ stop("method doesn't need synchronization"); + __ bind(L); + } +#endif // ASSERT + + // get synchronization object + { + Label done; + __ ldr(r0, access_flags); + __ tst(r0, JVM_ACC_STATIC); + // get receiver (assume this is frequent case) + __ ldr(r0, Address(rlocals, Interpreter::local_offset_in_bytes(0))); + __ b(done, Assembler::EQ); + __ load_mirror(r0, rmethod, r1); + +#ifdef ASSERT + { + Label L; + __ cbnz(r0, L); + __ stop("synchronization object is NULL"); + __ bind(L); + } +#endif // ASSERT + + __ bind(done); + } + + // add space for monitor & lock + __ sub(sp, sp, entry_size); // add space for a monitor entry + __ mov(rscratch1, sp); + __ str(rscratch1, monitor_block_top); // set new monitor block top + // store object + __ str(r0, Address(sp, BasicObjectLock::obj_offset_in_bytes())); + __ mov(c_rarg1, sp); // object address + __ lock_object(c_rarg1); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +// +// Args: +// lr: return address +// rmethod: Method* +// rlocals: pointer to locals +// stack_pointer: previous sp +// r4 contains the sender sp +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + // initialize fixed part of activation frame + __ reg_printf("About to print native entry, rmethod = %p\n", rmethod); + __ print_method_entry(rmethod, native_call); + + const int int_frame_size = 10; + const int common_frame_size = int_frame_size + frame::get_frame_size(); + const int frame_size = native_call ? common_frame_size + 2 : common_frame_size; + + if (native_call) { + // add 2 zero-initialized slots for native calls + __ sub(sp, sp, 2 * wordSize); + __ mov(rbcp, 0); + __ strd(rbcp, rbcp, Address(sp)); + } else { + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); // get ConstMethod + __ add(rbcp, rscratch1, in_bytes(ConstMethod::codes_offset())); // get codebase + } + + __ enter(); + __ sub(sp, sp, int_frame_size * wordSize); + + __ strd(sp, rbcp, Address(sp)); + + if (ProfileInterpreter) { + Label method_data_continue; + __ ldr(rscratch1, Address(rmethod, Method::method_data_offset())); + __ cbz(rscratch1, method_data_continue); + __ lea(rscratch1, Address(rscratch1, in_bytes(MethodData::data_offset()))); + __ bind(method_data_continue); + __ strd(rscratch1, rmethod, Address(sp, 6 * wordSize)); // save Method* and mdp (method data pointer) + } else { + __ mov(rscratch1, 0); + __ strd(rscratch1, rmethod, Address(sp, 6 * wordSize)); // save Method* (no mdp) + } + + // Get mirror and store it in the frame as GC root for this Method* + __ load_mirror(rscratch1, rmethod, rcpool); + __ mov(rscratch2, 0); + __ strd(rscratch1, rscratch2, Address(sp, 4 * wordSize)); + + __ ldr(rcpool, Address(rmethod, Method::const_offset())); + __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset())); + __ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset_in_bytes())); + __ strd(rlocals, rcpool, Address(sp, 2 * wordSize)); + + __ reg_printf("Three-quarters through\n"); + // set sender sp + // leave last_sp as null + __ mov(rscratch1, 0); + // r4 contains the sender sp + __ strd(rscratch1, r4, Address(sp, 8 * wordSize)); + + // Move SP out of the way + /*if (! native_call) { + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); + __ sub(rscratch1, sp, rscratch1, lsl(2)); + __ bic(sp, rscratch1, 0xf); + }*/ + // FIXME This code moves the sp to after the end of the stack - if this is what's happening + // some calls out of the VM may need to be patched + __ reg_printf("Fully through\n"); +} + +// End of helpers + +// Various method entries +//------------------------------------------------------------------------------------------------------------------------ +// +// + +// Method entry for java.lang.ref.Reference.get. +address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code for G1 (or any SATB based GC), + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * In the G1 code we do not check whether we need to block for + // a safepoint. If G1 is enabled then we must execute the specialized + // code for Reference.get (except when the Reference object is null) + // so that we can log the value in the referent field with an SATB + // update buffer. + // If the code for the getfield template is modified so that the + // G1 pre-barrier code is executed when the current method is + // Reference.get() then going through the normal method entry + // will be fine. + // * The G1 code can, however, check the receiver object (the instance + // of java.lang.Reference) and jump to the slow path if null. If the + // Reference object is null then we obviously cannot fetch the referent + // and so we don't need to call the G1 pre-barrier. Thus we can use the + // regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_entry. + // + // rmethod: Method* + // r13: senderSP must preserve for slow path, set SP to it on fast path + + // LR is live. It must be saved around calls. + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + Label slow_path; + const Register local_0 = c_rarg0; + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ldr(local_0, Address(sp, 0)); + __ cbz(local_0, slow_path); + + // Load the value of the referent field. + const Address field_address(local_0, referent_offset); + BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bs->load_word_at(_masm, IN_HEAP | ON_WEAK_OOP_REF, T_OBJECT, local_0, field_address, /*tmp1*/ rscratch2, /*tmp2*/ rscratch1); + + // areturn + __ mov(sp, r4); // set sp to sender sp + __ b(lr); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + return entry; +} + +void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. + if (UseStackBanging) { + const int n_shadow_pages = JavaThread::stack_shadow_zone_size() / os::vm_page_size(); + const int start_page = native_call ? n_shadow_pages : 1; + const int page_size = os::vm_page_size(); + __ mov(rscratch1, 0); + for (int pages = start_page; pages <= n_shadow_pages ; pages++) { + __ sub(rscratch2, sp, pages*page_size); + __ str(rscratch1, Address(rscratch2)); + } + } +} + + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // r1: Method* + // r4: sender sp + + address entry_point = __ pc(); + __ reg_printf("entering generate_native_entry, lr = %p, rfp = %p\n\tRBCP = %p\n", lr, rfp, rbcp); + + const Address constMethod (rmethod, Method::const_offset()); + const Address access_flags (rmethod, Method::access_flags_offset()); + const Address size_of_parameters(r2, ConstMethod:: + size_of_parameters_offset()); + + // get parameter size (always needed) + __ ldr(r2, constMethod); + __ load_unsigned_short(r2, size_of_parameters); + + // Native calls don't need the stack size check since they have no + // expression stack and the arguments are already on the stack and + // we only add a handful of words to the stack. + + // rmethod: Method* + // r2: size of parameters + // r4: sender sp + + // for natives the size of locals is zero + + // compute beginning of parameters (rlocals) + __ add(rlocals, sp, r2, lsl(2)); + __ sub(rlocals, rlocals, wordSize); + __ reg_printf("(start of parameters) rlocals = %p, nparams = %d\n", rlocals, r2); + + // initialize fixed part of activation frame + generate_fixed_frame(true); + __ reg_printf("pushed new fixed frame, lr = %p, rfp = %p\n", lr, rfp); + + Register locals_sp = r4; // the overwrites rdispatch, we can restore at end + // !! If this canges, change the end of arguements in interpreterRT_aarch32.cpp + //__ mov(r4, sp); //Save top of arguments + + // make sure method is native & not abstract +#ifdef ASSERT + __ ldr(r0, access_flags); + { + Label L; + __ tst(r0, JVM_ACC_NATIVE); + __ b(L, Assembler::NE); + __ stop("tried to execute non-native method as native"); + __ bind(L); + } + { + Label L; + __ tst(r0, JVM_ACC_ABSTRACT); + __ b(L, Assembler::EQ); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which hasn't been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation + // will check this flag. + + const Address do_not_unlock_if_synchronized(rthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ mov(rscratch2, true); + __ strb(rscratch2, do_not_unlock_if_synchronized); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(true); + // Note rscratch1 will contain zero here due to bang_stack_shadow_pages + // reset the _do_not_unlock_if_synchronized flag + //__ mov(rscratch1, 0); + __ strb(rscratch1, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ ldr(r0, access_flags); + __ tst(r0, JVM_ACC_SYNCHRONIZED); + __ b(L, Assembler::EQ); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // start execution +#ifdef ASSERT + { + Label L; + const Address monitor_block_top(rfp, + frame::get_interpreter_frame_monitor_block_top_offset() * wordSize); + __ ldr(rscratch1, monitor_block_top); + __ cmp(sp, rscratch1); + __ b(L, Assembler::EQ); + __ stop("broken stack frame setup in interpreter"); + __ bind(L); + } +#endif + + // jvmti support + __ notify_method_entry(); + + const Register result_handler = rlocals; + //This is recomputed for the new function and result_handler is not written until + // after the function has been called + + // allocate space for parameters + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ load_unsigned_short(rscratch1, Address(rscratch1, ConstMethod::size_of_parameters_offset())); + + __ sub(sp, sp, rscratch1, lsl(Interpreter::logStackElementSize + 1)); + // This +1 is a hack to double the amount of space allocated for parameters, this is likely far + // more than needed as in the worst case when parameters have to be placed on the stack they would be aligned + // as follows LONG | INT | EMPTY | LONG ... This would only increase the space used by a half. + __ align_stack(); + __ mov(locals_sp, sp); + __ reg_printf("Stack Pointer on arg copy, sp = %p, locals_sp = %p, rlocals = %p\n", sp, locals_sp, rlocals); + + // get signature handler + { + Label L; + __ ldr(rscratch1, Address(rmethod, Method::signature_handler_offset())); + __ cmp(rscratch1, 0); + __ b(L, Assembler::NE); + __ reg_printf("Prepare_native_call, locals_sp = %p, rlocals = %p\n", locals_sp, rlocals); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), rmethod); + __ reg_printf("Finished prepare_native_call, locals_sp = %p, rlocals = %p\n", locals_sp, rlocals); + __ ldr(rscratch1, Address(rmethod, Method::signature_handler_offset())); + __ bind(L); + } + + // call signature handler + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rlocals, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::to() == locals_sp, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1, + "adjust this code"); + + // The generated handlers do not touch rmethod (the method). + // However, large signatures cannot be cached and are generated + // each time here. The slow-path generator can do a GC on return, + // so we must reload it after the call. + __ reg_printf("**BEFORE**\nrlocals = %p,locals_sp = %p, sp = %p\n", rlocals, locals_sp, sp); + __ reg_printf("About to call the Method::signature_handler = %p\n", rscratch1); + __ bl(rscratch1); + __ reg_printf("**AFTER**\nr0 : %p, r1 : %p, r2 : %p\n", r0, r1, r2); + __ reg_printf("r3 : %p, sp : %p\n", r3, sp); + __ get_method(rmethod); // slow path can do a GC, reload rmethod + + + + // result handler is in r0 + // set result handler + __ mov(result_handler, r0); + // pass mirror handle if static call + { + Label L; + __ ldr(rscratch1, Address(rmethod, Method::access_flags_offset())); + __ tst(rscratch1, JVM_ACC_STATIC); + __ b(L, Assembler::EQ); + // get mirror + __ load_mirror(rscratch1, rmethod, r1); + // copy mirror into activation frame + __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_oop_temp_offset() * wordSize)); + // pass handle to mirror + __ add(c_rarg1, rfp, frame::get_interpreter_frame_oop_temp_offset() * wordSize); + __ bind(L); + } + + // get native function entry point in r14 + Register native_entry_point = r14; + + { + Label L; + __ ldr(native_entry_point, Address(rmethod, Method::native_function_offset())); + address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); + __ mov(rscratch2, unsatisfied); + __ ldr(rscratch2, rscratch2); + __ reg_printf("QWERTY native_entry_point = %p, unsatisfied_link_entry_point = %p\n", native_entry_point, rscratch2); + __ cmp(native_entry_point, rscratch2); + __ b(L, Assembler::NE); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), rmethod); + __ get_method(rmethod); + __ ldr(native_entry_point, Address(rmethod, Method::native_function_offset())); + __ bind(L); + } + + // pass JNIEnv + __ add(c_rarg0, rthread, in_bytes(JavaThread::jni_environment_offset())); + + // It is enough that the pc() points into the right code + // segment. It does not have to be the correct return pc. + __ set_last_Java_frame(sp, rfp, (address)NULL, rscratch1); + + // change thread state +#ifdef ASSERT + { + Label L; + __ ldr(rscratch1, Address(rthread, JavaThread::thread_state_offset())); + __ cmp(rscratch1, _thread_in_Java); + __ b(L, Assembler::EQ); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif + + // Change state to native + __ mov(rscratch1, _thread_in_native); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ dmb(Assembler::ISH); + __ str(rscratch1, Address(rscratch2)); + + __ reg_printf("Calling native method, lr = %p & rmethod = %p\n", lr, rmethod); + // Call the native method. + /*__ reg_printf("**ONCALL**\nr0 : %p\nr1 : %p\nr2 : %p\n", r0, r1, r2); + __ reg_printf("r3 : %p\n\nr4 : %p\nrloc : %p\n", r3, r4, rlocals);*/ + __ reg_printf("Stack Pointer on entry to native, sp = %p\n", sp); + __ bl(native_entry_point); + __ reg_printf("Returned from native, lr = %p, r1 = %p, r0 = %p\n", lr, r1, r0); + __ maybe_isb(); + __ get_method(rmethod); + // result potentially in r0, or v0 + + // make room for the pushes we're about to do + //__ sub(rscratch1, sp, 4 * wordSize); + //__ bic(sp, rscratch1, 0xf); + // NOTE: The order of these pushes is known to frame::interpreter_frame_result + // in order to extract the result of a method call. If the order of these + // pushes change or anything else is added to the stack then the code in + // interpreter_frame_result must also change. + __ reg_printf("Before push dtos, ltos. sp = %p\n", sp); + __ push(dtos); + __ push(ltos); + + // change thread state + __ mov(rscratch1, _thread_in_native_trans); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ dmb(Assembler::ISH); + __ str(rscratch1, Address(rscratch2)); + __ reg_printf("before os::is_MP\n"); + if (os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ membar(Assembler::AnyAny); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(rthread, rscratch2); + } + } + __ reg_printf("after os::is_MP\n"); + // check for safepoint operation in progress and/or pending suspend requests + { + Label L, Continue; + __ safepoint_poll_acquire(L); + __ ldr(rscratch2, Address(rthread, JavaThread::suspend_flags_offset())); + __ cbz(rscratch2, Continue); + __ bind(L); + + // Don't use call_VM as it will see a possible pending exception + // and forward it and never return here preventing us from + // clearing _last_native_pc down below. So we do a runtime call by + // hand. + // + __ mov(c_rarg0, rthread); + __ mov(rscratch2, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); + //__ blrt(rscratch2, 1, 0, 0); + __ bl(rscratch2); + __ maybe_isb(); + __ get_method(rmethod); + __ bind(Continue); + } + __ reg_printf("finished safepoint check\n"); + // change thread state + __ mov(rscratch1, _thread_in_Java); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ dmb(Assembler::ISH); + __ str(rscratch1, Address(rscratch2)); + + // reset_last_Java_frame + __ reset_last_Java_frame(true); + + __ mov(rscratch1, 0); + if (CheckJNICalls) { + // clear_pending_jni_exception_check + __ str(rscratch1, Address(rthread, JavaThread::pending_jni_exception_check_fn_offset())); + } + + // reset handle block + __ ldr(rscratch2, Address(rthread, JavaThread::active_handles_offset())); + __ str(rscratch1, Address(rscratch2, JNIHandleBlock::top_offset_in_bytes())); + + // If result is an oop unbox and store it in frame where gc will see it + // and result handler will pick it up + __ reg_printf("finished checking last_Java_frame\n"); + { + Label no_oop, not_weak, store_result; + //__ bkpt(345); + //__ adr(rscratch2, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); + __ mov(rscratch2, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); + __ reg_printf("Comparing rscratch2 = %p and result_handler = %p\n", rscratch2, result_handler); + + __ cmp(rscratch2, result_handler); + __ b(no_oop, Assembler::NE); + __ reg_printf("It's an oop.\n"); + // Unbox oop result, e.g. JNIHandles::resolve result. + __ pop(ltos); + __ resolve_jobject(r0, rthread, rscratch2); + __ str(r0, Address(rfp, frame::get_interpreter_frame_oop_temp_offset()*wordSize)); + // keep stack depth as expected by pushing oop which will eventually be discarded + __ push(ltos); + __ bind(no_oop); + } + + { + Label no_reguard; + __ lea(rscratch1, Address(rthread, in_bytes(JavaThread::stack_guard_state_offset()))); + __ ldrb(rscratch1, Address(rscratch1)); + __ cmp(rscratch1, JavaThread::stack_guard_yellow_reserved_disabled); + __ b(no_reguard, Assembler::NE); + + __ pusha(); // XXX only save smashed registers + __ mov(c_rarg0, rthread); + __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + __ bl(rscratch2); + __ popa(); // XXX only restore smashed registers + __ bind(no_reguard); + } + __ reg_printf("Restoring java-ish things\n"); + // The method register is junk from after the thread_in_native transition + // until here. Also can't call_VM until the bcp has been + // restored. Need bcp for throwing exception below so get it now. + __ get_method(rmethod); + __ get_dispatch(); // used to save sp in for args + // restore bcp to have legal interpreter frame, i.e., bci == 0 <=> + // rbcp == code_base() + __ ldr(rbcp, Address(rmethod, Method::const_offset())); // get ConstMethod* + __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset())); // get codebase + // handle exceptions (exception handling will handle unlocking!) + { + Label L; + __ reg_printf("Checking pending exceptions\n"); + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, L); + // Note: At some point we may want to unify this with the code + // used in call_VM_base(); i.e., we should use the + // StubRoutines::forward_exception code. For now this doesn't work + // here because the rsp is not correctly set at this point. + __ reg_printf("Calling vm to throw_pending_exception\n"); + + // Need to restore lr? - introduced on aarch32 port + //__ ldr(lr, Address(rfp, frame::get_return_addr_offset())); + + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // do unlocking if necessary + { + Label L; + __ reg_printf("testing if we need to unlock\n"); + __ ldr(rscratch1, Address(rmethod, Method::access_flags_offset())); + __ tst(rscratch1, JVM_ACC_SYNCHRONIZED); + __ b(L, Assembler::EQ); + // the code below should be shared with interpreter macro + // assembler implementation + { + Label unlock; + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object + // has not been unlocked by an explicit monitorexit bytecode. + + // monitor expect in c_rarg1 for slow unlock path + __ lea (c_rarg1, Address(rfp, // address of first monitor + (intptr_t)(frame::get_interpreter_frame_initial_sp_offset() * + wordSize - sizeof(BasicObjectLock)))); + + __ ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ reg_printf("Checking if we are already unlocked\n"); + __ cbnz(rscratch1, unlock); + + // Entry already unlocked, need to throw exception + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(unlock); + __ reg_printf("Doing unlock\n"); + __ unlock_object(c_rarg1); + } + __ bind(L); + } + __ reg_printf("finished unlocking\n"); + // jvmti support + // Note: This must happen _after_ handling/throwing any exceptions since + // the exception handler code notifies the runtime of method exits + // too. If this happens before, method entry/exit notifications are + // not properly paired (was bug - gri 11/22/99). + __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); + + // restore potential result in r0:d0, call result handler to + // restore potential result in ST0 & handle result + __ reg_printf("Before pop dtos, ltos. sp = %p\n", sp); + __ pop(ltos); + __ pop(dtos); + + __ reg_printf("Calling result handler, r1 = %p, r0 = %p\n", r1, r0); + __ bl(result_handler); + __ reg_printf("Finished result_handler\n RFP NOW = %p, r0 = %p\n", rfp, r0); + + // remove activation restore sp to sender_sp + __ ldr(rscratch1, Address(rfp, + frame::get_interpreter_frame_sender_sp_offset() * + wordSize)); // get sender sp + // remove frame anchor & restore sp + __ leave(); + + __ mov(sp, rscratch1); // Native frame so two extra fields + __ reg_printf("Returning to Java execution, restored frame = %p, lr = %p\n\tRBCP = %p\n", rfp, lr, rbcp); + __ b(lr); + + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + } + + return entry_point; +} + +address TemplateInterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rmethod: Method* + // sp: args + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ safepoint_poll(slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = c_rarg0; // crc + const Register val = c_rarg1; // source java byte value + const Register tbl = c_rarg2; // scratch + + // Arguments are reversed on java expression stack + __ ldr(val, Address(sp, 0)); // byte value + __ ldr(crc, Address(sp, wordSize)); // Initial CRC + + __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); + __ inv(crc, crc); + __ update_byte_crc32(crc, val, tbl); + __ inv(crc, crc); // result in c_rarg0 + + __ mov(sp, r4); + __ ret(lr); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + + return entry; + } + return NULL; +} + +address TemplateInterpreterGenerator::generate_CRC32_updateBytes_inner(AbstractInterpreter::MethodKind kind, int is_crc32c) { + if (!is_crc32c ? UseCRC32Intrinsics : UseCRC32CIntrinsics) { + address entry = __ pc(); + + // rmethod,: Method* + // sp: senderSP must preserved for slow path + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ safepoint_poll(slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; // length + const Register off = len; // offset (never overlaps with 'len') + const Register tmp = rscratch1;// tmp register used to load end in case crc32c + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (!is_crc32c ? kind == Interpreter::java_util_zip_CRC32_updateByteBuffer : + kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { + __ ldr(buf, Address(sp, 2*wordSize)); // long buf + __ ldr(off, Address(sp, wordSize)); // offset + __ add(buf, buf, off); // + offset + __ ldr(crc, Address(sp, 4*wordSize)); // Initial CRC + } else { + __ ldr(buf, Address(sp, 2*wordSize)); // byte[] array + __ add(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ ldr(off, Address(sp, wordSize)); // offset + __ add(buf, buf, off); // + offset + __ ldr(crc, Address(sp, 3*wordSize)); // Initial CRC + } + + // Can now load 'len' since we're finished with 'off' + if (!is_crc32c) { + __ ldr(len, Address(sp)); // Length + } else { + __ ldr(tmp, Address(sp)); + // len = end - offset + __ sub(len, tmp, off); + } + + __ mov(sp, r4); // Restore the caller's SP + + // We are frameless so we can just jump to the stub. + __ b(CAST_FROM_FN_PTR(address, !is_crc32c ? StubRoutines::updateBytesCRC32() : + StubRoutines::updateBytesCRC32C())); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + + if (!is_crc32c) + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + else + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + + return entry; + } + return NULL; +} + +address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + return generate_CRC32_updateBytes_inner(kind, false); +} +address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + return generate_CRC32_updateBytes_inner(kind, true); +} + +address TemplateInterpreterGenerator::generate_aescrypt_block_entry(AbstractInterpreter::MethodKind kind) { + // TODO enable once class fields offsets are known at this point + if (false && UseAESIntrinsics) { + const int K_offset = com_sun_crypto_provider_AESCrypt::K_offset(); + guarantee(K_offset > 0, "referent offset not initialized"); + + address entry = __ pc(); + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ safepoint_poll(slow_path); + + // Load parameters + const Register from = c_rarg0; // source java byte array address + const Register to = c_rarg1; // source java byte array address + const Register key = c_rarg2; // source java byte array address + const Register off = c_rarg3; // offset (never overlaps with 'len') + + // Arguments are reversed on java expression stack + // Calculate address of start element + __ ldr(off, Address(sp)); // to buffer offset + __ ldr(to, Address(sp, wordSize)); // to buffer + __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ add(to, to, off); + __ ldr(off, Address(sp, 2 * wordSize)); // from buffer offset + __ ldr(from, Address(sp, 3 * wordSize)); // from buffer + __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ add(from, from, off); + // Load the value of the referent field. + __ ldr(key, Address(sp, 4 * wordSize)); // object itself + const Address field_address(key, K_offset); + __ load_heap_oop(key, field_address); + __ add(key, key, arrayOopDesc::base_offset_in_bytes(T_INT)); // + header size + + __ mov(sp, r4); // Restore the caller's SP + + if (kind == Interpreter::com_sun_crypto_provider_AESCrypt_encryptBlock) { + // We are frameless so we can just jump to the stub. + __ b(CAST_FROM_FN_PTR(address, StubRoutines::aescrypt_encryptBlock())); + } else { + // We are frameless so we can just jump to the stub. + __ b(CAST_FROM_FN_PTR(address, StubRoutines::aescrypt_decryptBlock())); + } + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + + return entry; + } + return NULL; +} + +address TemplateInterpreterGenerator::generate_cipherBlockChaining_encryptAESCrypt_entry(AbstractInterpreter::MethodKind kind) { + // TODO enable once class fields offsets are known at this point + if (false && UseAESIntrinsics && UseNeon) { + address entry = __ pc(); + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ safepoint_poll(slow_path); + + const int embeddedCipher_offset = com_sun_crypto_provider_FeedbackCipher::embeddedCipher_offset(); + guarantee(embeddedCipher_offset > 0, "referent offset not initialized"); + const int K_offset = com_sun_crypto_provider_AESCrypt::K_offset(); + guarantee(K_offset > 0, "referent offset not initialized"); + const int r_offset = com_sun_crypto_provider_CipherBlockChaining::r_offset(); + guarantee(r_offset > 0, "referent offset not initialized"); + + // Load parameters + const Register from = c_rarg0; // source java byte array address + const Register to = c_rarg1; // dest java byte array address + const Register key = c_rarg2; // key java byte array address + const Register rvec = c_rarg3; // rvec java byte array address + const Register len = r4; // len of the input + const Register off = r5; // offset + const Register sp_pointer = r6; // sp + + __ mov(sp_pointer, r4); + // Arguments are reversed on java expression stack: + // outBuffer offset, outBuffer, inBuffer len, inBuffer len , inBuffer + // Calculate address of start element + __ ldr(off, Address(sp)); // to buffer offset + __ ldr(to, Address(sp, wordSize)); // to buffer + __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ add(to, to, off); + __ ldr(len, Address(sp, 2 * wordSize)); // len + __ ldr(off, Address(sp, 3 * wordSize)); // from buffer offset + __ ldr(from, Address(sp, 4 * wordSize)); // from buffer + __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ add(from, from, off); + // Load the value of the referent field. + __ ldr(rvec, Address(sp, 5 * wordSize)); // object itself + const Address field_address(rvec, r_offset); + __ load_heap_oop(rvec, field_address); + __ add(rvec, rvec, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + + __ ldr(key, Address(sp, 5 * wordSize)); // object itself + const Address field_address2(key, embeddedCipher_offset); + __ load_heap_oop(key, field_address2); + const Address field_address3(key, K_offset); + __ load_heap_oop(key, field_address3); + __ add(key, key, arrayOopDesc::base_offset_in_bytes(T_INT)); // + header size + + __ mov(sp, sp_pointer); // Restore the caller's SP + + if (kind == Interpreter::com_sun_crypto_provider_CipherBlockChaining_encrypt) { + // We are frameless so we can just jump to the stub. + __ b(CAST_FROM_FN_PTR(address, StubRoutines::cipherBlockChaining_encryptAESCrypt_special())); + } else { + // We are frameless so we can just jump to the stub. + __ b(CAST_FROM_FN_PTR(address, StubRoutines::cipherBlockChaining_decryptAESCrypt_special())); + } + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + + return entry; + } + return NULL; +} + +address TemplateInterpreterGenerator::generate_SHA_implCompress_entry(AbstractInterpreter::MethodKind kind) { + // TODO enable once class fields offsets are known at this point + if (false && ((UseSHA1Intrinsics && kind == Interpreter::sun_security_provider_SHA_implCompress) || + (UseSHA256Intrinsics && kind == Interpreter::sun_security_provider_SHA2_implCompress) || + (UseSHA512Intrinsics && kind == Interpreter::sun_security_provider_SHA5_implCompress))) { + address entry = __ pc(); + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + __ safepoint_poll(slow_path); + + int state_offset; + int state_data_offset; + address stub_addr; + switch (kind) { + case Interpreter::sun_security_provider_SHA_implCompress: + state_offset = sun_security_provider_SHA::state_offset(); + state_data_offset = arrayOopDesc::base_offset_in_bytes(T_INT); + stub_addr = StubRoutines::sha1_implCompress(); + break; + case Interpreter::sun_security_provider_SHA2_implCompress: + state_offset = sun_security_provider_SHA2::state_offset(); + state_data_offset = arrayOopDesc::base_offset_in_bytes(T_INT); + stub_addr = StubRoutines::sha256_implCompress(); + break; + case Interpreter::sun_security_provider_SHA5_implCompress: + state_offset = sun_security_provider_SHA5::state_offset(); + state_data_offset = arrayOopDesc::base_offset_in_bytes(T_LONG); + stub_addr = StubRoutines::sha512_implCompress(); + break; + default: + ShouldNotReachHere(); return NULL; // cannot be, stupid gcc + } + guarantee(state_offset > 0, "referent offset not initialized"); + + // Load parameters + const Register from = c_rarg0; // source java byte array address + const Register state = c_rarg1; // state java byte array address + const Register off = r3; // offset + + // Arguments are reversed on java expression stack: + // fromBufferOffset , fromBuffer + // Calculate address of start element + __ ldr(off, Address(sp)); // from buffer offset + __ ldr(from, Address(sp, wordSize)); // from buffer + __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ add(from, from, off); + // Load the value of the referent field. + __ ldr(state, Address(sp, 2 * wordSize)); // object itself + const Address field_address(state, state_offset); + __ load_heap_oop(state, field_address); + __ add(state, state, state_data_offset); // + header size + + __ mov(sp, r4); // Restore the caller's SP + + // We are frameless so we can just jump to the stub. + __ b(CAST_FROM_FN_PTR(address, stub_addr)); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + + return entry; + } + return NULL; +} + +// +// Generic interpreted method entry to (asm) interpreter +// +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // r4: sender sp + address entry_point = __ pc(); + + const Address constMethod(rmethod, Method::const_offset()); + const Address access_flags(rmethod, Method::access_flags_offset()); + const Address size_of_parameters(r3, + ConstMethod::size_of_parameters_offset()); + const Address size_of_locals(r3, ConstMethod::size_of_locals_offset()); + + // get parameter size (always needed) + // need to load the const method first + __ ldr(r3, constMethod); + __ load_unsigned_short(r2, size_of_parameters); + + // r2: size of parameters + + __ load_unsigned_short(r3, size_of_locals); // get size of locals in words + __ sub(r3, r3, r2); // r3 = no. of additional locals + + // see if we've got enough room on the stack for locals plus overhead. + generate_stack_overflow_check(); + + // compute beginning of parameters (rlocals) + __ add(rlocals, sp, r2, lsl(2)); + __ sub(rlocals, rlocals, wordSize); + + // Make room for locals + __ sub(rscratch1, sp, r3, lsl(2)); + // Align the sp value + __ bic(sp, rscratch1, StackAlignmentInBytes-1); + + // r3 - # of additional locals + // allocate space for locals + // explicitly initialize locals + { + Label exit, loop; + __ mov(rscratch2, 0); + __ cmp(r3, 0); + __ b(exit, Assembler::LE); // do nothing if r3 <= 0 + __ bind(loop); + __ str(rscratch2, Address(__ post(rscratch1, wordSize))); + __ subs(r3, r3, 1); // until everything initialized + __ b(loop, Assembler::NE); + __ bind(exit); + } + __ reg_printf("Done locals space\n", r2); + + // initialize fixed part of activation frame + __ reg_printf("About to do fixed frame\n", r2); + generate_fixed_frame(false); + // And the base dispatch table + __ get_dispatch(); + // make sure method is not native & not abstract + __ reg_printf("Just done generate_fixed_frame; rmethod = %p\n", rmethod); +#ifdef ASSERT + __ ldr(r0, access_flags); + { + Label L; + __ tst(r0, JVM_ACC_NATIVE); + __ b(L, Assembler::EQ); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ tst(r0, JVM_ACC_ABSTRACT); + __ b(L, Assembler::EQ); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which hasn't been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation + // will check this flag. + + const Address do_not_unlock_if_synchronized(rthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ mov(rscratch2, true); + __ strb(rscratch2, do_not_unlock_if_synchronized); + + Label no_mdp; + Register mdp = r3; + __ ldr(mdp, Address(rmethod, Method::method_data_offset())); + __ cbz(mdp, no_mdp); + __ add(mdp, mdp, in_bytes(MethodData::data_offset())); + __ profile_parameters_type(mdp, r1, r2); + __ bind(no_mdp); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + Label profile_method; + Label profile_method_continue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, + &profile_method, + &profile_method_continue); + if (ProfileInterpreter) { + __ bind(profile_method_continue); + } + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(false); + // Note rscratch1 will contain zero here + // reset the _do_not_unlock_if_synchronized flag + __ strb(rscratch1, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + // Allocate monitor and lock method + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ reg_printf("Checking synchronization, rmethod = %p\n", rmethod); + __ ldr(r0, access_flags); + __ tst(r0, JVM_ACC_SYNCHRONIZED); + __ b(L, Assembler::EQ); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // start execution +#ifdef ASSERT + { + Label L; + const Address monitor_block_top (rfp, + frame::get_interpreter_frame_monitor_block_top_offset() * wordSize); + __ ldr(rscratch1, monitor_block_top); + __ cmp(sp, rscratch1); + __ b(L, Assembler::EQ); + __ stop("broken stack frame setup in interpreter"); + __ bind(L); + } +#endif + + // jvmti support + __ notify_method_entry(); + __ reg_printf("About to dispatch, rmethod = %p, rlocals = %p\n", rmethod, rlocals); + __ dispatch_next(vtos); + __ reg_printf("Finshed dispatch? rmethod = %p\n", rmethod); + // invocation counter overflow + if (inc_counter) { + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + // don't think we need this + __ get_method(r1); + __ b(profile_method_continue); + } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(continue_after_compile); + } + + __ reg_printf("Just completed normal entry, rmethod = %p\n", rmethod); + return entry_point; +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + // Entry point in previous activation (i.e., if the caller was + // interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + __ reg_printf("rethrow_exception_entry\n"); + + // Restore sp to interpreter_frame_last_sp even though we are going + // to empty the expression stack for the exception processing. + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + // r0: exception + // r3: return address/pc that threw exception + __ restore_bcp(); // rbcp points to call/send + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_dispatch(); + + // Entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + __ reg_printf("throw_exception_entry\n"); + // If we came here via a NullPointerException on the receiver of a + // method, rmethod may be corrupt. + __ get_method(rmethod); + // expression stack is undefined here + // r0: exception + // rbcp: exception bcp + __ verify_oop(r0); + __ mov(c_rarg1, r0); + + // expression stack must be empty before entering the VM in case of + // an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + __ call_VM(r3, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::exception_handler_for_exception), + c_rarg1); + + // Calculate stack limit + /*__ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 4); + __ ldr(rscratch2, + Address(rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, lsl(2)); + __ bic(sp, rscratch1, 0xf);*/ + // Don't do this as we don't have a stack pointer + + // r0: exception handler entry point + // r3: preserved exception oop + // rbcp: bcp for exception handler + __ push_ptr(r3); // push exception which is now the only value on the stack + __ b(r0); // jump to exception handler (may be _remove_activation_entry!) + + // If the exception is not handled in the current frame the frame is + // removed and the exception is rethrown (i.e. exception + // continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction + // which caused the exception and the expression stack is + // empty. Thus, for any VM calls at this point, GC will find a legal + // oop map (with empty expression stack). + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ print_method_exit(false); + __ reg_printf("remove_activation_preserving_args_entry\n"); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition + // indicating that we are currently handling popframe, so that + // call_VMs that may happen later do not trigger new popframe + // handling cycles. + __ ldr(r3, Address(rthread, JavaThread::popframe_condition_offset())); + __ orr(r3, r3, JavaThread::popframe_processing_bit); + __ str(r3, Address(rthread, JavaThread::popframe_condition_offset())); + + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ ldr(c_rarg1, Address(rfp, frame::get_return_addr_offset() * wordSize)); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + InterpreterRuntime::interpreter_contains), c_rarg1); + __ cbnz(r0, caller_not_deoptimized); + + // Compute size of arguments for saving when returning to + // deoptimized caller + __ get_method(r0); + __ ldr(r0, Address(r0, Method::const_offset())); + __ load_unsigned_short(r0, Address(r0, in_bytes(ConstMethod:: + size_of_parameters_offset()))); + __ lsl(r0, r0, Interpreter::logStackElementSize); + __ restore_locals(); // XXX do we need this? + __ sub(rlocals, rlocals, r0); + __ add(rlocals, rlocals, wordSize); + // Save these arguments + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + Deoptimization:: + popframe_preserve_args), + rthread, r0, rlocals); + + __ remove_activation(vtos, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Inform deoptimization that it is responsible for restoring + // these arguments + __ mov(rscratch1, JavaThread::popframe_force_deopt_reexecution_bit); + __ str(rscratch1, Address(rthread, JavaThread::popframe_condition_offset())); + + // Continue in deoptimization handler + __ b(lr); + + __ bind(caller_not_deoptimized); + } + + __ remove_activation(vtos, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Restore the last_sp and null it out + __ ldr(sp, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + __ mov(rscratch1, 0); + __ str(rscratch1, Address(rfp, frame::get_interpreter_frame_last_sp_offset() * wordSize)); + // remove_activation restores sp? + + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(rmethod); + __ get_dispatch(); + + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } + + // Clear the popframe condition flag + __ mov(rscratch1, JavaThread::popframe_inactive); + __ str(rscratch1, Address(rthread, JavaThread::popframe_condition_offset())); + assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive"); + +#if INCLUDE_JVMTI + { + Label L_done; + __ ldrb(rscratch1, Address(rbcp, 0)); + __ cmp(rscratch1, Bytecodes::_invokestatic); + __ b(L_done, Assembler::EQ); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ ldr(c_rarg0, Address(rlocals, 0)); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), c_rarg0, rmethod, rbcp); + + __ cbz(r0, L_done); + + __ str(r0, Address(sp, 0)); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + // Restore machine SP + /*__ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 4); + __ ldr(rscratch2, + Address(rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, lsl(2)); + __ bic(sp, rscratch1, 0xf);*/ + + __ dispatch_next(vtos); + // end of PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + __ print_method_exit(false); + __ reg_printf("remove_activation_entry\n"); + + // preserve exception over this code sequence + __ pop_ptr(r0); + __ str(r0, Address(rthread, JavaThread::vm_result_offset())); + // remove the activation (without doing throws on illegalMonitorExceptions) + __ remove_activation(vtos, false, true, false); + // restore exception + // restore exception + __ get_vm_result(r0, rthread); + + // In between activations - previous activation type unknown yet + // compute continuation point - the continuation point expects the + // following registers set up: + // + // r0: exception + // lr: return address/pc that threw exception + // rsp: expression stack of caller + // rfp: fp of caller + // FIXME: There's no point saving LR here because VM calls don't trash it + __ strd(r0, lr, Address(__ pre(sp, -2 * wordSize))); // save exception & return address + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + rthread, lr); + __ mov(r1, r0); // save exception handler + __ ldrd(r0, lr, Address(__ post(sp, 2 * wordSize))); // restore exception & return address + // We might be returning to a deopt handler that expects r3 to + // contain the exception pc + __ mov(r3, lr); + // Note that an "issuing PC" is actually the next PC after the call + __ b(r1); // jump to exception + // handler of caller +} + + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + __ restore_bcp(); + __ restore_locals(); + __ empty_expression_stack(); + __ load_earlyret_value(state); + + __ ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset())); + Address cond_addr(rscratch1, JvmtiThreadState::earlyret_state_offset()); + + // Clear the earlyret state + assert(JvmtiThreadState::earlyret_inactive == 0, "should be"); + __ mov(rscratch2, 0); + __ str(rscratch2, cond_addr); + + __ remove_activation(state, + false, /* throw_monitor_exception */ + false, /* install_monitor_exception */ + true); /* notify_jvmdi */ + __ b(lr); + + return entry; +} // end of ForceEarlyReturn support + + + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + aep = __ pc(); __ push_ptr(); __ b(L); + dep = __ pc(); + if(hasFPU()){ + __ push_d(); __ b(L); + } + lep = __ pc(); __ push_l(); __ b(L); + fep = __ pc(); + if(hasFPU()){ + __ push_f(); __ b(L); + } + bep = cep = sep = + iep = __ pc(); __ push_i(); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + __ push(state); + // Save all registers on stack, so omit SP and PC + const RegSet push_set = RegSet::range(r0, r12) + lr; + const int push_set_cnt = __builtin_popcount(push_set.bits()); + __ push(push_set, sp); + __ ldr(c_rarg2, Address(sp, push_set_cnt*wordSize)); // Pass top of stack + __ ldr(c_rarg3, Address(sp, (push_set_cnt+1)*wordSize)); // Pass top of stack high part/2nd stack word + __ call_VM(noreg, + //TODO: XXX: moved from SharedRuntime to InterpreterRuntime + CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), + c_rarg1, c_rarg2, c_rarg3); + __ pop(RegSet::range(r0, r12) + lr, sp); + __ pop(state); + __ b(lr); // return from result handler + + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() { + __ push(c_rarg0); + __ push(rscratch1); + __ push(rscratch2); + Label L; + __ mov(rscratch2, (address) &BytecodeCounter::_counter_value); + __ bind(L); + __ ldrex(rscratch1, rscratch2); + __ add(rscratch1, rscratch1, 1); + // strex stores 2nd arg to dest adressed by 3rd arg, + // stores status to 1st arg. So, 1st and 2nd shoud be different. + __ strex(c_rarg0, rscratch1, rscratch2); + __ cmp(c_rarg0, 0); + __ b(L, Assembler::NE); + __ pop(rscratch2); + __ pop(rscratch1); + __ pop(c_rarg0); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; } + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; } + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + + assert(Interpreter::trace_code(t->tos_in()) != NULL, + "entry must have been generated"); + __ bl(Interpreter::trace_code(t->tos_in())); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() { + Label L; + __ push(rscratch1); + __ mov(rscratch1, (address) &BytecodeCounter::_counter_value); + __ ldr(rscratch1, Address(rscratch1)); + __ mov(rscratch2, StopInterpreterAt); + __ cmp(rscratch1, rscratch2); + __ b(L, Assembler::NE); + __ bkpt(0); + __ bind(L); + __ pop(rscratch1); +} + +#endif // !PRODUCT --- /dev/null 2018-09-25 19:25:29.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/templateTable_aarch32.cpp 2018-09-25 19:25:29.000000000 +0300 @@ -0,0 +1,4437 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "gc/shared/barrierSetAssembler.hpp" +#include "interp_masm_aarch32.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "vm_version_aarch32.hpp" + +#define __ _masm-> + +// Platform-dependent initialization + +extern void aarch32TestHook(); + +void TemplateTable::pd_initialize() { + aarch32TestHook(); +} + +// Address computation: local variables + +static inline Address iaddress(int n) { + return Address(rlocals, Interpreter::local_offset_in_bytes(n)); +} + +static inline Address laddress(int n) { + return iaddress(n + 1); +} + +static inline Address faddress(int n) { + return iaddress(n); +} + +static inline Address daddress(int n) { + return laddress(n); +} + +static inline Address aaddress(int n) { + return iaddress(n); +} + +static inline Address iaddress(Register r) { + return Address(rlocals, r, lsl(2)); +} + +// Note these two are different as VLDR/VSTR don't +// support base + (offset{ << x }) +static inline Address faddress(Register r, Register scratch, + InterpreterMacroAssembler* _masm) { + __ lea(scratch, Address(rlocals, r, lsl(2))); + return Address(scratch); +} + +static inline Address daddress(Register r, Register scratch, + InterpreterMacroAssembler* _masm) { + __ lea(scratch, Address(rlocals, r, lsl(2))); + return Address(scratch, Interpreter::local_offset_in_bytes(1)); +} + +static inline Address laddress(Register r, Register scratch, + InterpreterMacroAssembler * _masm) { + return daddress(r, scratch, _masm); +} + +static inline Address aaddress(Register r) { + return iaddress(r); +} + +static inline Address at_rsp() { + return Address(sp, 0); +} + +// At top of Java expression stack which may be different than sp(). It +// isn't for category 1 objects. +static inline Address at_tos () { + return Address(sp, Interpreter::expr_offset_in_bytes(0)); +} + +static inline Address at_tos_p1() { + return Address(sp, Interpreter::expr_offset_in_bytes(1)); +} + +static inline Address at_tos_p2() { + return Address(sp, Interpreter::expr_offset_in_bytes(2)); +} + +static inline Address at_tos_p3() { + return Address(sp, Interpreter::expr_offset_in_bytes(3)); +} + +static inline Address at_tos_p4() { + return Address(sp, Interpreter::expr_offset_in_bytes(4)); +} + +static inline Address at_tos_p5() { + return Address(sp, Interpreter::expr_offset_in_bytes(5)); +} + +// Condition conversion +static Assembler::Condition j_not(TemplateTable::Condition cc) { + switch (cc) { + case TemplateTable::equal : return Assembler::NE; + case TemplateTable::not_equal : return Assembler::EQ; + case TemplateTable::less : return Assembler::GE; + case TemplateTable::less_equal : return Assembler::GT; + case TemplateTable::greater : return Assembler::LE; + case TemplateTable::greater_equal: return Assembler::LT; + } + ShouldNotReachHere(); + return Assembler::EQ; +} + + +// Miscelaneous helper routines +// Store an oop (or NULL) at the Address described by obj. +// If val == noreg this means store a NULL +static void do_oop_store(InterpreterMacroAssembler* _masm, + Address dst, + Register val, + DecoratorSet decorators) { + assert(val == noreg || val == r0, "parameter is just for looks"); + assert(!dst.uses(r1) && !dst.uses(r14), "destroyed register"); + __ store_heap_oop(dst, val, r14, r1, decorators); +} + +static void do_oop_load(InterpreterMacroAssembler* _masm, + Address src, + Register dst, + DecoratorSet decorators) { + __ load_heap_oop(dst, src, r14, r1, decorators); +} + +Address TemplateTable::at_bcp(int offset) { + assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); + return Address(rbcp, offset); +} + +void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, + Register temp_reg, bool load_bc_into_bc_reg/*=true*/, + int byte_no) +{ + if (!RewriteBytecodes) return; + Label L_patch_done; + + switch (bc) { + case Bytecodes::_fast_aputfield: + case Bytecodes::_fast_bputfield: + case Bytecodes::_fast_zputfield: + case Bytecodes::_fast_cputfield: + case Bytecodes::_fast_dputfield: + case Bytecodes::_fast_fputfield: + case Bytecodes::_fast_iputfield: + case Bytecodes::_fast_lputfield: + case Bytecodes::_fast_sputfield: + { + // We skip bytecode quickening for putfield instructions when + // the put_code written to the constant pool cache is zero. + // This is required so that every execution of this instruction + // calls out to InterpreterRuntime::resolve_get_put to do + // additional, required work. + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + assert(load_bc_into_bc_reg, "we use bc_reg as temp"); + __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1); + __ mov(bc_reg, bc); + __ cmp(temp_reg, (unsigned) 0); + __ b(L_patch_done, Assembler::EQ); // don't patch + } + break; + default: + assert(byte_no == -1, "sanity"); + // the pair bytecodes have already done the load. + if (load_bc_into_bc_reg) { + __ mov(bc_reg, bc); + } + } + + if (JvmtiExport::can_post_breakpoint()) { + Label L_fast_patch; + // if a breakpoint is present we can't rewrite the stream directly + __ load_unsigned_byte(temp_reg, at_bcp(0)); + __ cmp(temp_reg, Bytecodes::_breakpoint); + __ b(L_fast_patch, Assembler::NE); + // Let breakpoint table handling rewrite to quicker bytecode + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), rmethod, rbcp, bc_reg); + __ b(L_patch_done); + __ bind(L_fast_patch); + } + +#ifdef ASSERT + Label L_okay; + __ load_unsigned_byte(temp_reg, at_bcp(0)); + __ cmp(temp_reg, (int) Bytecodes::java_code(bc)); + __ b(L_okay, Assembler::EQ); + __ cmp(temp_reg, bc_reg); + __ b(L_okay, Assembler::EQ); + __ stop("patching the wrong bytecode"); + __ bind(L_okay); +#endif + + // patch bytecode + __ strb(bc_reg, at_bcp(0)); + __ bind(L_patch_done); +} + + +// Individual instructions + +void TemplateTable::nop() { + transition(vtos, vtos); + // nothing to do +} + +void TemplateTable::shouldnotreachhere() { + transition(vtos, vtos); + __ stop("shouldnotreachhere bytecode"); +} + +void TemplateTable::aconst_null() +{ + transition(vtos, atos); + __ mov(r0, 0); +} + +void TemplateTable::iconst(int value) +{ + transition(vtos, itos); + __ mov(r0, value); +} + +void TemplateTable::lconst(int value) +{ + // int is 32 bit and only ever used for loading small values + __ mov(r0, value & 0xffffffff); + __ mov(r1, 0); +} + +void TemplateTable::fconst(int value) +{ + transition(vtos, ftos); + float fval = value; + assert(value == 0 || value == 1 || value == 2, "invalid float const"); + if (hasFPU()) { + if(__ operand_valid_for_float_immediate(fval)) { + __ vmov_f32(d0, fval); + } else { + __ mov(r0, *((uint32_t*)&fval)); + __ vmov_f32(d0, r0); + } + } else { + __ mov(r0, *((uint32_t*)&fval)); + } +} + +void TemplateTable::dconst(int value) +{ + transition(vtos, dtos); + double dval = value; + assert(value == 0 || value == 1 || value == 2, "invalid double const"); + if (hasFPU()) { + if(__ operand_valid_for_double_immediate(dval)) { + __ vmov_f64(d0, dval); + } else { + uint32_t* ptr = (uint32_t*)&dval; + __ mov(r0, *ptr); + __ mov(r1, *(ptr + 1)); + __ vmov_f64(d0, r0, r1); + } + } else { + uint32_t* ptr = (uint32_t*)&dval; + __ mov(r0, *ptr); + __ mov(r1, *(ptr + 1)); + } +} + +void TemplateTable::bipush() +{ + transition(vtos, itos); + __ load_signed_byte(r0, at_bcp(1)); +} + +void TemplateTable::sipush() +{ + transition(vtos, itos); + __ load_unsigned_short(r0, at_bcp(1)); + __ rev(r0, r0); + __ asr(r0, r0, 16); +} + +void TemplateTable::ldc(bool wide) +{ + transition(vtos, vtos); + Label call_ldc, notFloat, notClass, notInt, Done; + + if (wide) { + __ get_unsigned_2_byte_index_at_bcp(r1, 1); + } else { + __ load_unsigned_byte(r1, at_bcp(1)); + } + __ get_cpool_and_tags(r2, r0); + + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ add(r3, r1, tags_offset); + __ ldrb(r3, Address(r0, r3)); + + // unresolved class - get the resolved class + __ cmp(r3, JVM_CONSTANT_UnresolvedClass); + __ b(call_ldc, Assembler::EQ); + + // unresolved class in error state - call into runtime to throw the error + // from the first resolution attempt + __ cmp(r3, JVM_CONSTANT_UnresolvedClassInError); + __ b(call_ldc, Assembler::EQ); + + // resolved class - need to call vm to get java mirror of the class + __ cmp(r3, JVM_CONSTANT_Class); + __ b(notClass, Assembler::NE); + + __ bind(call_ldc); + __ mov(c_rarg1, wide); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), c_rarg1); + __ push_ptr(r0); + __ verify_oop(r0); + __ b(Done); + + __ bind(notClass); + if (hasFPU()) { + __ cmp(r3, JVM_CONSTANT_Float); + __ b(notFloat, Assembler::NE); + // ftos + __ adds(r1, r2, r1, lsl(2)); + __ vldr_f32(d0, Address(r1, base_offset)); + + __ push_f(); + + __ b(Done); + + __ bind(notFloat); + } else { + // Soft FP pass through T_INT case. +#ifdef ASSERT + __ cmp(r3, JVM_CONSTANT_Float); + __ mov(r3, JVM_CONSTANT_Integer, Assembler::EQ); +#endif // ASSER + } + + __ cmp(r3, JVM_CONSTANT_Integer); + __ b(notInt, Assembler::NE); + + // itos + __ adds(r1, r2, r1, lsl(2)); + __ ldr(r0, Address(r1, base_offset)); + __ push_i(r0); + __ b(Done); + + __ bind(notInt); + condy_helper(Done); + + __ bind(Done); +} + +// Fast path for caching oop constants. +void TemplateTable::fast_aldc(bool wide) +{ + transition(vtos, atos); + + Register result = r0; + Register tmp = r1; + Register rarg = r2; + + int index_size = wide ? sizeof(u2) : sizeof(u1); + + Label resolved; + + // We are resolved if the resolved reference cache entry contains a + // non-null object (String, MethodType, etc.) + assert_different_registers(result, tmp); + __ get_cache_index_at_bcp(tmp, 1, index_size); + __ load_resolved_reference_at_index(result, tmp); + __ cbnz(result, resolved); + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + + // first time invocation - must resolve first + __ mov(rarg, (int)bytecode()); + __ call_VM(result, entry, rarg); + + __ bind(resolved); + + { // Check for the null sentinel. + // If we just called the VM, it already did the mapping for us, + // but it's harmless to retry. + + // Stash null_sentinel address to get its value later + __ movptr(rarg, (uintptr_t)Universe::the_null_sentinel_addr()); + __ ldr(tmp, Address(rarg)); + __ cmp(result, tmp); + __ mov(result, 0, Assembler::EQ); // NULL object reference + } + + if (VerifyOops) { + // Safe to call with 0 result + __ verify_oop(result); + } +} + +void TemplateTable::ldc2_w() +{ + transition(vtos, vtos); + Label notLong, Done; + __ get_unsigned_2_byte_index_at_bcp(r0, 1); + + __ get_cpool_and_tags(r1, r2); + const int base_offset = ConstantPool::header_size() * wordSize; + const int tags_offset = Array::base_offset_in_bytes(); + + // get type + __ lea(r2, Address(r2, r0, lsl(0))); + __ load_unsigned_byte(r2, Address(r2, tags_offset)); + if (hasFPU()) { + Label notDouble; + __ cmp(r2, (int)JVM_CONSTANT_Double); + __ b(notDouble, Assembler::NE); + // dtos + __ lea (r2, Address(r1, r0, lsl(2))); + __ vldr_f64(d0, Address(r2, base_offset)); + __ push_d(); + __ b(Done); + + __ bind(notDouble); + } + __ cmp(r2, (int)JVM_CONSTANT_Long); + __ b(notLong, Assembler::NE); + // ltos + __ lea(r1, Address(r1, r0, lsl(2))); + __ ldr(r0, Address(r1, base_offset)); + __ ldr(r1, Address(r1, base_offset + wordSize)); + __ push_l(); + __ b(Done); + + __ bind(notLong); + condy_helper(Done); + __ bind(Done); +} + +void TemplateTable::condy_helper(Label& Done) +{ + Register obj = r0; + Register rarg = r1; + Register flags = r2; + Register off = r3; + + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); + + __ mov(rarg, (int) bytecode()); + __ call_VM(obj, entry, rarg); + + __ get_vm_result_2(flags, rthread); + + // VMr = obj = base address to find primitive value to push + // VMr2 = flags = (tos, off) using format of CPCE::_flags + assert(ConstantPoolCacheEntry::field_index_mask == right_n_bits(ConstantPoolCacheEntry::field_index_bits), + "fix the next line"); + __ ubfx(off, flags, 0, ConstantPoolCacheEntry::field_index_bits); + + const Address field(obj, off); + + // What sort of thing are we loading? + __ ubfx(flags, flags, ConstantPoolCacheEntry::tos_state_shift, + ConstantPoolCacheEntry::tos_state_bits); + + switch (bytecode()) { + case Bytecodes::_ldc: + case Bytecodes::_ldc_w: + { + // tos in (itos, ftos, stos, btos, ctos, ztos) + Label notInt, notFloat, notShort, notByte, notChar, notBool; + __ cmp(flags, itos); + __ b(notInt, Assembler::NE); + // itos + __ ldr(r0, field); + __ push(itos); + __ b(Done); + + __ bind(notInt); + __ cmp(flags, ftos); + __ b(notFloat, Assembler::NE); + // ftos + __ lea(rarg, field); // vldr does not accept [r+r] address format + __ load_float(Address(rarg)); + __ push(ftos); + __ b(Done); + + __ bind(notFloat); + __ cmp(flags, stos); + __ b(notShort, Assembler::NE); + // stos + __ load_signed_short(r0, field); + __ push(stos); + __ b(Done); + + __ bind(notShort); + __ cmp(flags, btos); + __ b(notByte, Assembler::NE); + // btos + __ load_signed_byte(r0, field); + __ push(btos); + __ b(Done); + + __ bind(notByte); + __ cmp(flags, ctos); + __ b(notChar, Assembler::NE); + // ctos + __ load_unsigned_short(r0, field); + __ push(ctos); + __ b(Done); + + __ bind(notChar); + __ cmp(flags, ztos); + __ b(notBool, Assembler::NE); + // ztos + __ load_signed_byte(r0, field); + __ push(ztos); + __ b(Done); + + __ bind(notBool); + break; + } + + case Bytecodes::_ldc2_w: + { + Label notLong, notDouble; + __ cmp(flags, ltos); + __ b(notLong, Assembler::NE); + // ltos + __ ldrd(r0, r1, field); + __ push(ltos); + __ b(Done); + + __ bind(notLong); + __ cmp(flags, dtos); + __ b(notDouble, Assembler::NE); + // dtos + __ lea(rarg, field); // vdlr does not accept [r+r] address format + __ load_double(Address(rarg)); + __ push(dtos); + __ b(Done); + + __ bind(notDouble); + break; + } + + default: + ShouldNotReachHere(); + } + + __ stop("bad ldc/condy"); +} + +void TemplateTable::locals_index(Register reg, int offset) +{ + __ ldrb(reg, at_bcp(offset)); + __ neg(reg, reg); +} + +void TemplateTable::iload() { + iload_internal(); +} + +void TemplateTable::nofast_iload() { + iload_internal(may_not_rewrite); +} + +void TemplateTable::iload_internal(RewriteControl rc) { + transition(vtos, itos); + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + Register bc = r2; + + // get next bytecode + __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); + + // if _iload, wait to rewrite to iload2. We only want to rewrite the + // last two iloads in a pair. Comparing against fast_iload means that + // the next bytecode is neither an iload or a caload, and therefore + // an iload pair. + __ cmp(r1, Bytecodes::_iload); + __ b(done, Assembler::EQ); + + // if _fast_iload rewrite to _fast_iload2 + __ cmp(r1, Bytecodes::_fast_iload); + __ mov(bc, Bytecodes::_fast_iload2); + __ b(rewrite, Assembler::EQ); + + // if _caload rewrite to _fast_icaload + __ cmp(r1, Bytecodes::_caload); + __ mov(bc, Bytecodes::_fast_icaload); + __ b(rewrite, Assembler::EQ); + + // else rewrite to _fast_iload + __ mov(bc, Bytecodes::_fast_iload); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_iload, bc, r1, false); + __ bind(done); + + } + + // do iload, get the local value into tos + locals_index(r1); + __ ldr(r0, iaddress(r1)); + __ reg_printf("iloaded value %d\n", r0); +} + +void TemplateTable::fast_iload2() +{ + transition(vtos, itos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); + __ push(itos); + locals_index(r1, 3); + __ ldr(r0, iaddress(r1)); +} + +void TemplateTable::fast_iload() +{ + transition(vtos, itos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); +} + +void TemplateTable::lload() +{ + transition(vtos, ltos); + locals_index(r2); + __ ldrd(r0, r1, laddress(r2, r3, _masm)); +} + +void TemplateTable::fload() +{ + transition(vtos, ftos); + locals_index(r1); + __ load_float(faddress(r1, r2, _masm)); +} + +void TemplateTable::dload() +{ + transition(vtos, dtos); + if (hasFPU()) { + __ ldrb(r1, at_bcp(1)); + __ sub(r1, rlocals, r1, lsl(LogBytesPerWord)); + __ load_double(Address(r1, Interpreter::local_offset_in_bytes(1))); + } else { + locals_index(r2); + __ load_double(daddress(r2, r3, _masm)); + } +} + +void TemplateTable::aload() +{ + transition(vtos, atos); + locals_index(r1); + __ ldr(r0, iaddress(r1)); +} + +void TemplateTable::locals_index_wide(Register reg) { + __ ldrh(reg, at_bcp(2)); + __ rev16(reg, reg); + __ neg(reg, reg); +} + +void TemplateTable::wide_iload() { + transition(vtos, itos); + locals_index_wide(r1); + __ ldr(r0, iaddress(r1)); +} + +void TemplateTable::wide_lload() +{ + transition(vtos, ltos); + locals_index_wide(r2); + __ ldrd(r0, r1, laddress(r2, r3, _masm)); +} + +void TemplateTable::wide_fload() +{ + transition(vtos, ftos); + locals_index_wide(r1); + if (hasFPU()) { + __ vldr_f32(d0, faddress(r1, rscratch1, _masm)); + } else { + __ ldr (r0, faddress(r1, rscratch1, _masm)); + } +} + +void TemplateTable::wide_dload() +{ + transition(vtos, dtos); + if (hasFPU()) { + __ ldrh(r1, at_bcp(2)); + __ rev16(r1, r1); + __ sub(r1, rlocals, r1, lsl(LogBytesPerWord)); + __ vldr_f64(d0, Address(r1, Interpreter::local_offset_in_bytes(1))); + } else { + locals_index_wide(r2); + __ ldrd(r0, r1, daddress(r2, r3, _masm)); + } +} + +void TemplateTable::wide_aload() +{ + transition(vtos, atos); + locals_index_wide(r1); + __ ldr(r0, aaddress(r1)); +} + +void TemplateTable::index_check(Register array, Register index) +{ + // destroys rscratch1 + // check array + __ null_check(array, arrayOopDesc::length_offset_in_bytes()); + // sign extend index for use by indexed load + // __ movl2ptr(index, index); + // check index + Register length = rscratch1; + __ ldr(length, Address(array, arrayOopDesc::length_offset_in_bytes())); + __ reg_printf("Checking index in array, array = %p, alen = %d, index = %d\n", array, length, index); + __ cmp(index, length); + if (index != r2) { + // ??? convention: move aberrant index into r2 for exception message + assert(r2 != array, "different registers"); + __ mov(r2, index); + } + Label ok; + __ b(ok, Assembler::LO); + // ??? convention: move array into r3 for exception message + __ mov(r3, array); + __ mov(rscratch1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ b(rscratch1); + __ bind(ok); +} + +void TemplateTable::iaload() +{ + transition(itos, itos); + __ mov(r2, r0); + __ pop_ptr(r0); + // r0: array + // r2: index + index_check(r0, r2); // leaves index in r2, kills rscratch1 + __ lea(r2, Address(r0, r2, lsl(2))); + __ access_load_tos_at(T_INT, IN_HEAP | IS_ARRAY, Address(r2, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); +} + +void TemplateTable::laload() +{ + transition(itos, ltos); + __ mov(r2, r0); + __ pop_ptr(r0); + // r0: array + // r2: index + index_check(r0, r2); // leaves index in r2, kills rscratch1 + __ lea(r2, Address(r0, r2, lsl(3))); + __ lea(r2, Address(r2, arrayOopDesc::base_offset_in_bytes(T_LONG))); + __ atomic_ldrd(r0, r1, r2); + __ access_load_tos_at(T_LONG, IN_HEAP | IS_ARRAY, Address(r2), noreg, noreg); +} + +void TemplateTable::faload() +{ + transition(itos, ftos); + __ mov(r2, r0); + __ pop_ptr(r0); + // r0: array + // r2: index + index_check(r0, r2); // leaves index in r2, kills rscratch1 + __ lea(r2, Address(r0, r2, lsl(2))); + __ access_load_tos_at(T_FLOAT, IN_HEAP | IS_ARRAY, + Address(r2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); +} + +void TemplateTable::daload() +{ + transition(itos, dtos); + __ mov(r2, r0); + __ pop_ptr(r0); + // r0: array + // r2: index + index_check(r0, r2); // leaves index in r2, kills rscratch1 + __ lea(r2, Address(r0, r2, lsl(3))); + __ access_load_tos_at(T_DOUBLE, IN_HEAP | IS_ARRAY, + Address(r2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); +} + +void TemplateTable::aaload() +{ + transition(itos, atos); + __ mov(r2, r0); + __ pop_ptr(r0); + // r0: array + // r2: index + index_check(r0, r2); // leaves index in r2, kills rscratch1 + __ lea(r2, Address(r0, r2, lsl(2))); + do_oop_load(_masm, + Address(r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), + r0, + IS_ARRAY); +} + +void TemplateTable::baload() +{ + transition(itos, itos); + __ mov(r2, r0); + __ pop_ptr(r0); + // r0: array + // r2: index + index_check(r0, r2); // leaves index in r2, kills rscratch1 + __ lea(r2, Address(r0, r2, lsl(0))); + __ access_load_tos_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(r2, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); +} + +void TemplateTable::caload() +{ + transition(itos, itos); + __ mov(r2, r0); + __ pop_ptr(r0); + // r0: array + // r2: index + index_check(r0, r2); // leaves index in r2, kills rscratch1 + __ lea(r2, Address(r0, r2, lsl(1))); + __ access_load_tos_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(r2, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); +} + +// iload followed by caload frequent pair +void TemplateTable::fast_icaload() +{ + transition(vtos, itos); + // load index out of locals + locals_index(r2); + __ ldr(r2, iaddress(r2)); + + __ pop_ptr(r0); + + // r0: array + // r2: index + index_check(r0, r2); // leaves index in r1, kills rscratch1 + __ lea(r2, Address(r0, r2, lsl(1))); + __ access_load_tos_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(r2, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); +} + +void TemplateTable::saload() +{ + transition(itos, itos); + __ mov(r2, r0); + __ pop_ptr(r0); + // r0: array + // r2: index + index_check(r0, r2); // leaves index in r2, kills rscratch1 + __ lea(r2, Address(r0, r2, lsl(1))); + __ access_load_tos_at(T_SHORT, IN_HEAP | IS_ARRAY, Address(r2, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); +} + +void TemplateTable::iload(int n) +{ + transition(vtos, itos); + __ ldr(r0, iaddress(n)); +} + +void TemplateTable::lload(int n) +{ + transition(vtos, ltos); + __ ldrd(r0, r1, laddress(n)); +} + +void TemplateTable::fload(int n) +{ + transition(vtos, ftos); + if (hasFPU()) { + __ vldr_f32(d0, faddress(n)); + } else { + __ ldr(r0, faddress(n)); + } +} + +void TemplateTable::dload(int n) +{ + transition(vtos, dtos); + if (hasFPU()) { + __ vldr_f64(d0, daddress(n)); + } else { + __ ldrd(r0, r1, daddress(n)); + } +} + +void TemplateTable::aload(int n) +{ + transition(vtos, atos); + __ ldr(r0, iaddress(n)); + __ reg_printf("aload, loaded %p\n", r0); +} + +void TemplateTable::aload_0() { + aload_0_internal(); +} + +void TemplateTable::nofast_aload_0() { + aload_0_internal(may_not_rewrite); +} + +void TemplateTable::aload_0_internal(RewriteControl rc) { + // According to bytecode histograms, the pairs: + // + // _aload_0, _fast_igetfield + // _aload_0, _fast_agetfield + // _aload_0, _fast_fgetfield + // + // occur frequently. If RewriteFrequentPairs is set, the (slow) + // _aload_0 bytecode checks if the next bytecode is either + // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then + // rewrites the current bytecode into a pair bytecode; otherwise it + // rewrites the current bytecode into _fast_aload_0 that doesn't do + // the pair check anymore. + // + // Note: If the next bytecode is _getfield, the rewrite must be + // delayed, otherwise we may miss an opportunity for a pair. + // + // Also rewrite frequent pairs + // aload_0, aload_1 + // aload_0, iload_1 + // These bytecodes with a small amount of code are most profitable + // to rewrite + if (RewriteFrequentPairs && rc == may_rewrite) { + Label rewrite, done; + const Register bc = r14; + + // get next bytecode + __ load_unsigned_byte(r1, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); + + // if _getfield then wait with rewrite + __ cmp(r1, Bytecodes::Bytecodes::_getfield); + __ b(done, Assembler::EQ); + + // if _igetfield then rewrite to _fast_iaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmp(r1, Bytecodes::_fast_igetfield); + __ mov(bc, Bytecodes::_fast_iaccess_0); + __ b(rewrite, Assembler::EQ); + + // if _agetfield then rewrite to _fast_aaccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmp(r1, Bytecodes::_fast_agetfield); + __ mov(bc, Bytecodes::_fast_aaccess_0); + __ b(rewrite, Assembler::EQ); + + // if _fgetfield then rewrite to _fast_faccess_0 + assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ cmp(r1, Bytecodes::_fast_fgetfield); + __ mov(bc, Bytecodes::_fast_faccess_0); + __ b(rewrite, Assembler::EQ); + + // else rewrite to _fast_aload0 + assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition"); + __ mov(bc, Bytecodes::Bytecodes::_fast_aload_0); + + // rewrite + // bc: new bytecode + __ bind(rewrite); + patch_bytecode(Bytecodes::_aload_0, bc, r1, false); + + __ bind(done); + } + + // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop). + aload(0); +} + +void TemplateTable::istore() +{ + transition(itos, vtos); + locals_index(r1); + __ lea(rscratch1, iaddress(r1)); + __ str(r0, Address(rscratch1)); +} + +void TemplateTable::lstore() +{ + transition(ltos, vtos); + locals_index(r2); + __ strd(r0, r1, laddress(r2, r3, _masm)); +} + +void TemplateTable::fstore() { + transition(ftos, vtos); + locals_index(r1); + __ lea(rscratch1, iaddress(r1)); + if (hasFPU()) { + __ vstr_f32(d0, Address(rscratch1)); + } else { + __ str(r0, Address(rscratch1)); + } +} + +void TemplateTable::dstore() { + transition(dtos, vtos); + if (hasFPU()) { + locals_index(r1); + __ vstr_f64(d0, daddress(r1, rscratch1, _masm)); + } else { + locals_index(r2); + __ strd(r0, r1, daddress(r2, rscratch1, _masm)); + } +} + +void TemplateTable::astore() +{ + transition(vtos, vtos); + __ pop_ptr(r0); + __ reg_printf("Astore, storing value %p\n", r0); + locals_index(r1); + __ str(r0, aaddress(r1)); +} + +void TemplateTable::wide_istore() { + transition(vtos, vtos); + __ pop_i(); + locals_index_wide(r1); + __ lea(rscratch1, iaddress(r1)); + __ str(r0, Address(rscratch1)); +} + +void TemplateTable::wide_lstore() { + transition(vtos, vtos); + __ pop_l(); + locals_index_wide(r2); + __ strd(r0, r1, laddress(r2, r3, _masm)); +} + +void TemplateTable::wide_fstore() { + transition(vtos, vtos); + locals_index_wide(r1); + __ lea(rscratch1, faddress(r1, rscratch1, _masm)); + if (hasFPU()) { + __ pop_f(); + __ vstr_f32(d0, rscratch1); + } else { + __ pop_i(); + __ str(r0, Address(rscratch1)); + } +} + +void TemplateTable::wide_dstore() { + transition(vtos, vtos); + if (hasFPU()) { + __ pop_d(); + locals_index_wide(r1); + __ vstr_f64(d0, daddress(r1, rscratch1, _masm)); + } else { + __ pop_l(); + locals_index_wide(r2); + __ strd(r0, r1, daddress(r2, rscratch1, _masm)); + } +} + +void TemplateTable::wide_astore() { + transition(vtos, vtos); + __ pop_ptr(r0); + locals_index_wide(r1); + __ str(r0, aaddress(r1)); +} + +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(r2); + __ pop_ptr(r3); + // r0: value + // r2: index + // r3: array + index_check(r3, r2); // prefer index in r2 + __ lea(rscratch1, Address(r3, r2, lsl(2))); + __ access_store_tos_at(T_INT, IN_HEAP | IS_ARRAY, + Address(rscratch1, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); +} + +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i(r2); + __ pop_ptr(r3); + // : value + // r2: index + // r3: array + index_check(r3, r2); // prefer index in r2 + __ lea(rscratch1, Address(r3, r2, lsl(3))); + __ lea(rscratch1, Address(rscratch1, + arrayOopDesc::base_offset_in_bytes(T_LONG))); + __ access_store_tos_at(T_LONG, IN_HEAP | IS_ARRAY, Address(rscratch1), noreg, noreg); +} + +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(r2); + __ pop_ptr(r3); + // d0/r0: value + // r2: index + // r3: array + index_check(r3, r2); // prefer index in r2 + __ lea(rscratch1, Address(r3, r2, lsl(2))); + __ access_store_tos_at(T_FLOAT, IN_HEAP | IS_ARRAY, + Address(rscratch1, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); +} + +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i(r2); + __ pop_ptr(r3); + // d0/r0:r1: value + // r2: index + // r3: array + index_check(r3, r2); // prefer index in r2 + __ lea(rscratch1, Address(r3, r2, lsl(3))); + __ access_store_tos_at(T_DOUBLE, IN_HEAP | IS_ARRAY, + Address(rscratch1, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); +} + +void TemplateTable::aastore() { + Label is_null, ok_is_subtype, done; + transition(vtos, vtos); + // stack: ..., array, index, value + __ ldr(r0, at_tos()); // value + __ ldr(r2, at_tos_p1()); // index + __ ldr(r3, at_tos_p2()); // array + + Address element_address(r2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); + + index_check(r3, r2); + + // do array store check - check for NULL value first + __ cmp(r0, 0); + __ b(is_null, Assembler::EQ); + + // Move subklass into r1 + __ load_klass(r1, r0); + // Move superklass into r0 + __ load_klass(r0, r3); + __ ldr(r0, Address(r0, + ObjArrayKlass::element_klass_offset())); + // Compress array + index*oopSize + 12 into a single register. Frees r2. + + // Generate subtype check. Blows r2, r14? + // Superklass in r0. Subklass in r1. + __ gen_subtype_check(r1, ok_is_subtype); + + // Come here on failure + // object is at TOS + __ b(Interpreter::_throw_ArrayStoreException_entry); + + // Come here on success + __ bind(ok_is_subtype); + + // Get the value we will store + __ ldr(r0, at_tos()); + // And the clobbered index + __ ldr(r2, at_tos_p1()); // index + __ lea(r2, Address(r3, r2, lsl(2))); + // Now store using the appropriate barrier + + do_oop_store(_masm, element_address, r0, IS_ARRAY); + __ b(done); + + // Have a NULL in r0, r3=array, r2=index. Store NULL at ary[idx] + __ bind(is_null); + __ profile_null_seen(r1); + + __ lea(r2, Address(r3, r2, lsl(2))); + // Store a NULL + do_oop_store(_masm, element_address, noreg, IS_ARRAY); + + // Pop stack arguments + __ bind(done); + __ add(sp, sp, 3 * Interpreter::stackElementSize); +} + +void TemplateTable::bastore() +{ + transition(itos, vtos); + __ pop_i(r2); + __ pop_ptr(r3); + // r0: value + // r2: index + // r3: array + index_check(r3, r2); // prefer index in r2 + + // Need to check whether array is boolean or byte + // since both types share the bastore bytecode. + __ load_klass(r1, r3); + __ ldr(r1, Address(r1, Klass::layout_helper_offset())); + int diffbit = Klass::layout_helper_boolean_diffbit(); + __ tst(r1, diffbit); + __ andr(r0, r0, 1, Assembler::NE); // if it is a T_BOOLEAN array, mask the stored value to 0/1 + + __ lea(rscratch1, Address(r3, r2)); + __ access_store_tos_at(T_BYTE, IN_HEAP | IS_ARRAY, + Address(rscratch1, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); +} + +void TemplateTable::castore() +{ + transition(itos, vtos); + __ pop_i(r2); + __ pop_ptr(r3); + // r0: value + // r2: index + // r3: array + index_check(r3, r2); // prefer index in r2 + __ lea(rscratch1, Address(r3, r2, lsl(1))); + __ access_store_tos_at(T_CHAR, IN_HEAP | IS_ARRAY, + Address(rscratch1, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); +} + +void TemplateTable::sastore() +{ + castore(); +} + +void TemplateTable::istore(int n) +{ + transition(itos, vtos); + __ str(r0, iaddress(n)); +} + +void TemplateTable::lstore(int n) +{ + transition(ltos, vtos); + __ strd(r0, r1, laddress(n)); +} + +void TemplateTable::fstore(int n) +{ + transition(ftos, vtos); + if (hasFPU()) { + __ vstr_f32(d0, faddress(n)); + } else { + __ str(r0, faddress(n)); + } +} + +void TemplateTable::dstore(int n) +{ + transition(dtos, vtos); + if (hasFPU()) { + __ vstr_f64(d0, daddress(n)); + } else { + __ strd(r0, r1, daddress(n)); + } +} + +void TemplateTable::astore(int n) +{ + transition(vtos, vtos); + __ pop_ptr(r0); + __ str(r0, iaddress(n)); +} + +void TemplateTable::pop() +{ + transition(vtos, vtos); + __ add(sp, sp, Interpreter::stackElementSize); +} + +void TemplateTable::pop2() +{ + transition(vtos, vtos); + __ add(sp, sp, 2 * Interpreter::stackElementSize); +} + +void TemplateTable::dup() +{ + transition(vtos, vtos); + __ ldr(r0, Address(sp, 0)); + __ reg_printf("Value duplicated is %p\n", r0); + __ push(r0); + // stack: ..., a, a +} + +void TemplateTable::dup_x1() +{ + transition(vtos, vtos); + // stack: ..., a, b + __ ldr(r0, at_tos()); // load b + __ ldr(r2, at_tos_p1()); // load a + __ str(r0, at_tos_p1()); // store b + __ str(r2, at_tos()); // store a + __ push(r0); // push b + // stack: ..., b, a, b +} + +void TemplateTable::dup_x2() +{ + transition(vtos, vtos); + // stack: ..., a, b, c + __ ldr(r0, at_tos()); // load c + __ ldr(r2, at_tos_p2()); // load a + __ str(r0, at_tos_p2()); // store c in a + __ push(r0); // push c + // stack: ..., c, b, c, c + __ ldr(r0, at_tos_p2()); // load b + __ str(r2, at_tos_p2()); // store a in b + // stack: ..., c, a, c, c + __ str(r0, at_tos_p1()); // store b in c + // stack: ..., c, a, b, c +} + +void TemplateTable::dup2() +{ + transition(vtos, vtos); + // stack: ..., a, b + __ ldr(r0, at_tos_p1()); // load a + __ push(r0); // push a + __ ldr(r0, at_tos_p1()); // load b + __ push(r0); // push b + // stack: ..., a, b, a, b +} + +void TemplateTable::dup2_x1() +{ + transition(vtos, vtos); + // stack: ..., a, b, c + __ ldr(r2, at_tos()); // load c + __ ldr(r0, at_tos_p1()); // load b + __ push(r0); // push b + __ push(r2); // push c + // stack: ..., a, b, c, b, c + __ str(r2, at_tos_p3()); // store c in b + // stack: ..., a, c, c, b, c + __ ldr(r2, at_tos_p4()); // load a + __ str(r2, at_tos_p2()); // store a in 2nd c + // stack: ..., a, c, a, b, c + __ str(r0, at_tos_p4()); // store b in a + // stack: ..., b, c, a, b, c +} + +void TemplateTable::dup2_x2() +{ + transition(vtos, vtos); + // stack: ..., a, b, c, d + __ ldr(r2, at_tos()); // load d + __ ldr(r0, at_tos_p1()); // load c + __ push(r0) ; // push c + __ push(r2); // push d + // stack: ..., a, b, c, d, c, d + __ ldr(r0, at_tos_p4()); // load b + __ str(r0, at_tos_p2()); // store b in d + __ str(r2, at_tos_p4()); // store d in b + // stack: ..., a, d, c, b, c, d + __ ldr(r2, at_tos_p5()); // load a + __ ldr(r0, at_tos_p3()); // load c + __ str(r2, at_tos_p3()); // store a in c + __ str(r0, at_tos_p5()); // store c in a + // stack: ..., c, d, a, b, c, d +} + +void TemplateTable::swap() +{ + transition(vtos, vtos); + // stack: ..., a, b + __ ldr(r2, at_tos_p1()); // load a + __ ldr(r0, at_tos()); // load b + __ str(r2, at_tos()); // store a in b + __ str(r0, at_tos_p1()); // store b in a + // stack: ..., b, a +} + +void TemplateTable::iop2(Operation op) +{ + transition(itos, itos); + // r0 <== r1 op r0 + __ pop_i(r1); + switch (op) { + case add : __ add(r0, r1, r0); break; + case sub : __ sub(r0, r1, r0); break; + case mul : __ mul(r0, r1, r0); break; + case _and : __ andr(r0, r1, r0); break; + case _or : __ orr(r0, r1, r0); break; + case _xor : __ eor(r0, r1, r0); break; + case shl : + __ andr(r0, r0, 0x1f); + __ lsl(r0, r1, r0); + break; + case shr : + __ andr(r0, r0, 0x1f); + __ asr(r0, r1, r0); + break; + case ushr : + __ andr(r0, r0, 0x1f); + __ lsr(r0, r1, r0); + break; + default : ShouldNotReachHere(); + } +} + +void TemplateTable::lop2(Operation op) +{ + transition(ltos, ltos); + // <== op + __ pop_l(r2, r3); + switch (op) { + case add : __ adds(r0, r2, r0); __ adc(r1, r3, r1); break; + case sub : __ subs(r0, r2, r0); __ sbc(r1, r3, r1); break; + case mul : __ mult_long(r0, r2, r0); break; + case _and : __ andr(r0, r2, r0); __ andr(r1, r3, r1); break; + case _or : __ orr(r0, r2, r0); __ orr(r1, r3, r1); break; + case _xor : __ eor(r0, r2, r0); __ eor(r1, r3, r1); break; + default : ShouldNotReachHere(); + } +} + +void TemplateTable::idiv() +{ + transition(itos, itos); + // explicitly check for div0 + Label no_div0; + __ cmp(r0, 0); + __ b(no_div0, Assembler::NE); + __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry); + __ b(rscratch1); + __ bind(no_div0); + __ pop_i(r1); + // r0 <== r1 idiv r0 + __ divide(r0, r1, r0, 32, false); +} + +void TemplateTable::irem() +{ + transition(itos, itos); + // explicitly check for div0 + Label no_div0; + __ cmp(r0, 0); + __ b(no_div0, Assembler::NE); + __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry); + __ b(rscratch1); + __ bind(no_div0); + __ pop_i(r1); + // r0 <== r1 irem r0 + __ divide(r0, r1, r0, 32, true); +} + +void TemplateTable::lmul() +{ + transition(ltos, ltos); + __ pop_l(r2, r3); + __ mult_long(r0, r0, r2); +} + +void TemplateTable::ldiv() +{ + transition(ltos, ltos); + // explicitly check for div0 + __ cmp(r0, 0); + __ cmp(r1, 0, Assembler::EQ); + __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry, Assembler::EQ); + __ b(rscratch1, Assembler::EQ); + + __ pop_l(r2, r3); + // r0 <== r1 ldiv r0 + __ divide(r0, r2, r0, 64, false); +} + +void TemplateTable::lrem() +{ + transition(ltos, ltos); + // explicitly check for div0 + __ cmp(r0, 0); + __ cmp(r1, 0, Assembler::EQ); + __ mov(rscratch1, Interpreter::_throw_ArithmeticException_entry, Assembler::EQ); + __ b(rscratch1, Assembler::EQ); + + __ pop_l(r2, r3); + // r0 <== r1 lrem r0 + __ divide(r0, r2, r0, 64, true); +} + +void TemplateTable::lshl() { + transition(itos, ltos); + // shift count is in r0 - take shift from bottom six bits only + __ andr(r0, r0, 0x3f); + __ pop_l(r2, r3); + const int word_bits = 8 * wordSize; + + __ sub(r1, r0, word_bits); + __ lsl(r3, r3, r0); + __ orr(r3, r3, r2, lsl(r1)); + __ rsb(r1, r0, word_bits); + __ orr(r1, r3, r2, lsr(r1)); + __ lsl(r0, r2, r0); +} + +void TemplateTable::lshr() { + transition(itos, ltos); + // shift count is in r0 - take shift from bottom six bits only + __ andr(rscratch1, r0, 0x3f); + __ pop_l(r2, r3); + const int word_bits = 8 * wordSize; + + __ lsr(r2, r2, rscratch1); + __ rsb(r1, rscratch1, word_bits); + __ orr(r0, r2, r3, lsl(r1)); + __ asr(r1, r3, rscratch1); + __ subs(rscratch1, rscratch1, word_bits); + __ orr(r0, r2, r3, asr(rscratch1), Assembler::GT); +} + +void TemplateTable::lushr() { + transition(itos, ltos); + // shift count is in r0 - take shift from bottom six bits only + __ andr(r0, r0, 0x3f); + __ pop_l(r2, r3); + const int word_bits = 8 * wordSize; + + __ lsr(r2, r2, r0); + __ rsb(r1, r0, word_bits); + __ orr(r2, r2, r3, lsl(r1)); + __ lsr(r1, r3, r0); + __ sub(r0, r0, word_bits); + __ orr(r0, r2, r3, lsr(r0)); +} + +void TemplateTable::fop2(Operation op) +{ + transition(ftos, ftos); + if(hasFPU()) { + switch (op) { + case add: + __ pop_f(d1); + __ vadd_f32(d0, d1, d0); + break; + case sub: + __ pop_f(d1); + __ vsub_f32(d0, d1, d0); + break; + case mul: + __ pop_f(d1); + __ vmul_f32(d0, d1, d0); + break; + case div: + __ pop_f(d1); + __ vdiv_f32(d0, d1, d0); + break; + case rem: + __ vmov_f32(f1, f0); + __ pop_f(f0); + #ifndef HARD_FLOAT_CC + __ vmov_f32(r0, f0); + __ vmov_f32(r1, f1); + #endif + __ mov(rscratch1, (address)fmodf); + __ bl(rscratch1); + #ifndef HARD_FLOAT_CC + __ vmov_f32(f0, r0); + #endif + break; + default: + ShouldNotReachHere(); + break; + } + } else { +#ifdef __SOFTFP__ + __ mov(r1, r0); + __ pop_i(r0); + switch (op) { + case add: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fadd), 0); + break; + case sub: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fsub), 0); + break; + case mul: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fmul), 0); + break; + case div: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fdiv), 0); + break; + case rem: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 0); + break; + default: + ShouldNotReachHere(); + break; + } + #else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif +} +} + +void TemplateTable::dop2(Operation op) +{ + transition(dtos, dtos); + if (hasFPU()) { + switch (op) { + case add: + __ pop_d(d1); + __ vadd_f64(d0, d1, d0); + break; + case sub: + __ pop_d(d1); + __ vsub_f64(d0, d1, d0); + break; + case mul: + __ pop_d(d1); + __ vmul_f64(d0, d1, d0); + break; + case div: + __ pop_d(d1); + __ vdiv_f64(d0, d1, d0); + break; + case rem: + __ vmov_f64(d1, d0); + __ pop_d(d0); + #ifndef HARD_FLOAT_CC + __ vmov_f64(r0, r1, d0); + __ vmov_f64(r2, r3, d1); + #endif + __ mov(rscratch1, (address)(double (*)(double, double))fmod); + __ bl(rscratch1); + #ifndef HARD_FLOAT_CC + __ vmov_f64(d0, r0, r1); + #endif + break; + default: + ShouldNotReachHere(); + break; + } + } else { +#ifdef __SOFTFP__ + __ push_l(r0, r1); + __ pop_l(r2,r3); + __ pop_l(r0,r1); + switch (op) { + case add: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dadd), 0); + break; + case sub: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsub), 0); + break; + case mul: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dmul), 0); + break; + case div: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::ddiv), 0); + break; + case rem: + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 0); + break; + default: + ShouldNotReachHere(); + break; + } +#else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif + } +} + +void TemplateTable::ineg() +{ + transition(itos, itos); + __ neg(r0, r0); + +} + +void TemplateTable::lneg() +{ + transition(ltos, ltos); + __ rsbs(r0, r0, 0); + __ rsc(r1, r1, 0); +} + +void TemplateTable::fneg() +{ + transition(ftos, ftos); + if(hasFPU()) { + __ vneg_f32(d0, d0); + } else { +#ifdef __SOFTFP__ + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fneg), 0); +#else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif + } +} + +void TemplateTable::dneg() +{ + transition(dtos, dtos); + if(hasFPU()) { + __ vneg_f64(d0, d0); + } else { +#ifdef __SOFTFP__ + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dneg), 0); +#else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif + } +} + +void TemplateTable::iinc() +{ + transition(vtos, vtos); + __ load_signed_byte(r1, at_bcp(2)); // get constant + locals_index(r2); + __ ldr(r0, iaddress(r2)); + __ add(r0, r0, r1); + __ str(r0, iaddress(r2)); +} + +void TemplateTable::wide_iinc() +{ + transition(vtos, vtos); + __ ldr(r1, at_bcp(2)); // get constant and index + __ rev16(r1, r1); + __ uxth(r2, r1); + __ neg(r2, r2); + __ sxth(r1, r1, ror(16)); + __ ldr(r0, iaddress(r2)); + __ add(r0, r0, r1); + __ str(r0, iaddress(r2)); +} + +void TemplateTable::convert() +{ + // Checking +#ifdef ASSERT + { + TosState tos_in = ilgl; + TosState tos_out = ilgl; + switch (bytecode()) { + case Bytecodes::_i2l: // fall through + case Bytecodes::_i2f: // fall through + case Bytecodes::_i2d: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_in = itos; break; + case Bytecodes::_l2i: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_l2d: tos_in = ltos; break; + case Bytecodes::_f2i: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_f2d: tos_in = ftos; break; + case Bytecodes::_d2i: // fall through + case Bytecodes::_d2l: // fall through + case Bytecodes::_d2f: tos_in = dtos; break; + default : ShouldNotReachHere(); + } + switch (bytecode()) { + case Bytecodes::_l2i: // fall through + case Bytecodes::_f2i: // fall through + case Bytecodes::_d2i: // fall through + case Bytecodes::_i2b: // fall through + case Bytecodes::_i2c: // fall through + case Bytecodes::_i2s: tos_out = itos; break; + case Bytecodes::_i2l: // fall through + case Bytecodes::_f2l: // fall through + case Bytecodes::_d2l: tos_out = ltos; break; + case Bytecodes::_i2f: // fall through + case Bytecodes::_l2f: // fall through + case Bytecodes::_d2f: tos_out = ftos; break; + case Bytecodes::_i2d: // fall through + case Bytecodes::_l2d: // fall through + case Bytecodes::_f2d: tos_out = dtos; break; + default : ShouldNotReachHere(); + } + transition(tos_in, tos_out); + } +#endif // ASSERT + // static const int64_t is_nan = 0x8000000000000000L; + //TODO fix this and remove _ sxtw and _ uxtw as don't exist in arm32 + // need to figure out about handling doubles and longs as they won't + // fit into a single register in arm32 + // Conversion + switch (bytecode()) { + case Bytecodes::_i2l: + // __ sxtw(r0, r0); + __ reg_printf("Convert i2l (before) 0x00000000%08x\n", r0); + __ asr(r1, r0, 31); + __ reg_printf("Convert i2l (after) 0x%08x%08x\n", r1, r0); + break; + case Bytecodes::_i2f: + if(hasFPU()) { + __ vmov_f32(d0, r0); + __ vcvt_f32_s32(d0, d0); + } else { +#ifdef __SOFTFP__ + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::i2f), 0); +#else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif + } + break; + case Bytecodes::_i2d: + if(hasFPU()) { + //__ scvtfwd(d0, r0); + __ vmov_f32(d0, r0); + __ vcvt_f64_s32(d0, d0); + } else { +#ifdef __SOFTFP__ + // ro -> + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::i2d), 0); +#else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif + } + break; + case Bytecodes::_i2b: + __ sxtb(r0, r0); + break; + case Bytecodes::_i2c: + __ uxth(r0, r0); + break; + case Bytecodes::_i2s: + __ sxth(r0, r0); + break; + case Bytecodes::_l2i: + //__ uxtw(r0, r0); + break; + case Bytecodes::_l2f: + // -> d0 + // or -> r0 for softfp + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::l2f), 0); +#ifndef HARD_FLOAT_CC + if(hasFPU()) { + __ vmov_f32(d0, r0); + } +#endif + break; + case Bytecodes::_l2d: + // -> d0 + // or -> for softfp + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::l2d), 0); +#ifndef HARD_FLOAT_CC + if(hasFPU()) { + __ vmov_f64(d0, r0, r1); + } +#endif + break; + case Bytecodes::_f2i: + { + if(hasFPU()) { + __ vcvt_s32_f32(d0, d0); + __ vmov_f32(r0, d0); + } else { + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 0); + } + } + break; + case Bytecodes::_f2l: + { +#if !defined(HARD_FLOAT_CC) + //float already in d0 long goes to + if(hasFPU()) { + //Need to move float in d0 to r0 + __ vmov_f32(r0, d0); + } +#endif //!defined(HARD_FLOAT_CC) + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 0); + } + break; + case Bytecodes::_f2d: + if(hasFPU()) { + __ vcvt_f64_f32(d0, d0); + } else { +#ifdef __SOFTFP__ + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::f2d), 0); +#else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif + } + break; + case Bytecodes::_d2i: + { + if(hasFPU()) { + __ vcvt_s32_f64(d0, d0); + __ vmov_f32(r0, d0); + } else { +#ifdef __SOFTFP__ + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 0); +#else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif + } + } + break; + case Bytecodes::_d2l: + { + // d0 -> +#if !defined(HARD_FLOAT_CC) + if(hasFPU()) { + //Need to move float in d0 to r0 + __ vmov_f64(r0, r1, d0); + } +#endif //!defined(HARD_FLOAT_CC) + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 0); + } + break; + case Bytecodes::_d2f: + if(hasFPU()) { + __ vcvt_f32_f64(d0, d0); + } else { +#ifdef __SOFTFP__ + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::d2f), 0); +#else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif + } + break; + default: + ShouldNotReachHere(); + } +} + +void TemplateTable::lcmp() +{ + transition(ltos, itos); + __ pop_l(r2, r3); + // == : 0 + // < : 1 + // > : -1 + __ reg_printf("Long comparing 0x%08x%08x\n", r1, r0); + __ reg_printf(" and 0x%08x%08x\n", r3, r2); + //cmp high + Label lower, end; + __ cmp(r3, r1); + __ b(lower, Assembler::EQ); + __ mov(r0, 1); + __ sub(r0, r0, 2, Assembler::LT); + __ b(end); + + __ bind(lower); + __ subs(r0, r2, r0); + __ mov(r0, 1, Assembler::NE); + __ sub(r0, r0, 2, Assembler::LO); // Place -1 + __ bind(end); + + __ reg_printf("Result of comparison is %d\n", r0); +} + +void TemplateTable::float_cmp(bool is_float, int unordered_result) +{ + if(hasFPU()) { + if (is_float) { + __ pop_f(d1); + __ vcmp_f32(d1, d0); + } else { + __ pop_d(d1); + /*__ vmov_f64(r0, r1, d0); + __ vmov_f64(r2, r3, d1); + __ reg_printf("Doing comparison cmp( 0x%08x%08x,\n", r3, r2); + __ reg_printf(" 0x%08x%08x)\n", r1, r0);*/ + __ vcmp_f64(d1, d0); + } + __ vmrs(rscratch1); + __ andr(rscratch1, rscratch1, Assembler::FP_MASK); + __ reg_printf("Masked comparison result is %08x\n", rscratch1); + + if (unordered_result < 0) { + // we want -1 for unordered or less than, 0 for equal and 1 for + // greater than. + __ mov(r0, -1); + __ cmp(rscratch1, Assembler::FP_EQ); + __ mov(r0, 0, Assembler::EQ); + __ cmp(rscratch1, Assembler::FP_GT); + __ mov(r0, 1, Assembler::EQ); + __ reg_printf("un_res < 0, comparison result is %d\n", r0); + } else { + // we want -1 for less than, 0 for equal and 1 for unordered or + // greater than. + __ mov(r0, 1); + __ cmp(rscratch1, Assembler::FP_LT); + __ sub(r0, r0, 2, Assembler::EQ); //Load -1 - but one less instruction + __ cmp(rscratch1, Assembler::FP_EQ); + __ mov(r0, 0, Assembler::EQ); + __ reg_printf("un_res >= 0, comparison result is %d\n", r0); + } + } else { // hasFPU +#ifdef __SOFTFP__ + if (is_float) { + __ mov(r1, r0); + __ pop_i(r0); + if (unordered_result < 0) { + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fcmpl), 0); + } else { + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::fcmpg), 0); + } + } else { + __ mov(r2, r0); + __ mov(r3, r1); + __ pop_l(r0); + if (unordered_result < 0) { + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcmpl), 0); + } else { + __ call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcmpg), 0); + } + } +#else + // expected -mfloat-abi=soft + ShouldNotReachHere(); +#endif + } +} + +void TemplateTable::branch(bool is_jsr, bool is_wide) +{ + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + + __ profile_taken_branch(r0, r1); + const ByteSize be_offset = MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset(); + const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset(); + + // load branch displacement + if (!is_wide) { + __ ldrh(r2, at_bcp(1)); + __ rev16(r2, r2); + // sign extend the 16 bit value in r2 + __ sxth(r2, r2); + } else { + __ ldr(r2, at_bcp(1)); + __ rev(r2, r2); + } + + // Handle all the JSR stuff here, then exit. + // It's much shorter and cleaner than intermingling with the non-JSR + // normal-branch stuff occurring below. + + if (is_jsr) { + // Pre-load the next target bytecode into rscratch1 + __ load_unsigned_byte(rscratch1, Address(rbcp, r2)); + // compute return address as bci + __ ldr(rscratch2, Address(rmethod, Method::const_offset())); + __ add(rscratch2, rscratch2, + in_bytes(ConstMethod::codes_offset()) - (is_wide ? 5 : 3)); + __ sub(r1, rbcp, rscratch2); + __ push_i(r1); + // Adjust the bcp by the 16-bit displacement in r2 + __ add(rbcp, rbcp, r2); + __ dispatch_only(vtos, /*generate_poll*/true); + return; + } + + // Normal (non-jsr) branch handling + + // Adjust the bcp by the displacement in r2 + __ add(rbcp, rbcp, r2); + + assert(UseLoopCounter || !UseOnStackReplacement, + "on-stack-replacement requires loop counters"); + Label backedge_counter_overflow; + Label profile_method; + Label dispatch; + if (UseLoopCounter) { + // increment backedge counter for backward branches + // r0: MDO + // w1: MDO bumped taken-count + // r2: target offset + __ cmp(r2, 0); + __ b(dispatch, Assembler::GT); // count only if backward branch + + // ECN: FIXME: This code smells + // check if MethodCounters exists + Label has_counters; + __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset())); + __ cbnz(rscratch1, has_counters); + __ push(r0); + __ push(r1); + __ push(r2); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), rmethod); + __ pop(r2); + __ pop(r1); + __ pop(r0); + __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset())); + __ cbz(rscratch1, dispatch); // No MethodCounters allocated, OutOfMemory + __ bind(has_counters); + + if (TieredCompilation) { + Label no_mdo; + int increment = InvocationCounter::count_increment; + if (ProfileInterpreter) { + // Are we profiling? + __ ldr(r1, Address(rmethod, in_bytes(Method::method_data_offset()))); + __ cbz(r1, no_mdo); + // Increment the MDO backedge counter + const Address mdo_backedge_counter(r1, in_bytes(MethodData::backedge_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address mask(r1, in_bytes(MethodData::backedge_mask_offset())); + __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, + r0, rscratch2, false, Assembler::EQ, + UseOnStackReplacement ? &backedge_counter_overflow : NULL); + __ b(dispatch); + } + __ bind(no_mdo); + // Increment backedge counter in MethodCounters* + __ ldr(rscratch1, Address(rmethod, Method::method_counters_offset())); + const Address mask(rscratch1, in_bytes(MethodCounters::backedge_mask_offset())); + __ increment_mask_and_jump(Address(rscratch1, be_offset), increment, mask, + r0, rscratch2, false, Assembler::EQ, + UseOnStackReplacement ? &backedge_counter_overflow : NULL); + } else { // not TieredCompilation + // increment counter + __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset())); + __ ldr(r0, Address(rscratch2, be_offset)); // load backedge counter + __ add(rscratch1, r0, InvocationCounter::count_increment); // increment counter + __ str(rscratch1, Address(rscratch2, be_offset)); // store counter + + __ ldr(r0, Address(rscratch2, inv_offset)); // load invocation counter + __ mov(rscratch1, (unsigned)InvocationCounter::count_mask_value); + __ andr(r0, r0, rscratch1); // and the status bits + __ ldr(rscratch1, Address(rscratch2, be_offset)); // load backedge counter + __ add(r0, r0, rscratch1); // add both counters + + if (ProfileInterpreter) { + // Test to see if we should create a method data oop + __ ldr(rscratch1, Address(rscratch2, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); + __ cmp(r0, rscratch1); + __ b(dispatch, Assembler::LT); + + // if no method data exists, go to profile method + __ test_method_data_pointer(r0, profile_method); + + if (UseOnStackReplacement) { + // check for overflow against w1 which is the MDO taken count + __ ldr(rscratch1, Address(rscratch2, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); + __ cmp(r1, rscratch1); + __ b(dispatch, Assembler::LO); // Intel == Assembler::below + + // When ProfileInterpreter is on, the backedge_count comes + // from the MethodData*, which value does not get reset on + // the call to frequency_counter_overflow(). To avoid + // excessive calls to the overflow routine while the method is + // being compiled, add a second test to make sure the overflow + // function is called only once every overflow_frequency. + const int overflow_frequency = 1024; + const int of_mask_lsb = exact_log2(overflow_frequency); + __ bfc(r1, of_mask_lsb, 32 - of_mask_lsb); + __ cmp(r1, 0); + __ b(backedge_counter_overflow, Assembler::EQ); + + } + } else { + if (UseOnStackReplacement) { + // check for overflow against w0, which is the sum of the + // counters + __ ldr(rscratch1, Address(rscratch2, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()))); + __ cmp(r0, rscratch1); + __ b(backedge_counter_overflow, Assembler::HS); // Intel == Assembler::aboveEqual + } + } + } + } + __ bind(dispatch); + + // Pre-load the next target bytecode into rscratch1 + __ load_unsigned_byte(rscratch1, Address(rbcp, 0)); + + // continue with the bytecode @ target + // rscratch1: target bytecode + // rbcp: target bcp + __ dispatch_only(vtos, /*generate_poll*/true); + + if (UseLoopCounter) { + if (ProfileInterpreter) { + // Out-of-line code to allocate method data oop. + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ load_unsigned_byte(r1, Address(rbcp, 0)); // restore target bytecode + __ set_method_data_pointer_for_bcp(); + __ b(dispatch); + } + + if (UseOnStackReplacement) { + // invocation counter overflow + __ bind(backedge_counter_overflow); + __ neg(r2, r2); + __ add(r2, r2, rbcp); // branch bcp + // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp) + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), + r2); + + __ load_unsigned_byte(r1, Address(rbcp, 0)); // restore target bytecode + + // r0: osr nmethod (osr ok) or NULL (osr not possible) + // r1: target bytecode + // r2: scratch + __ cbz(r0, dispatch); // test result -- no osr if null + // nmethod may have been invalidated (VM may block upon call_VM return) + __ ldr(r2, Address(r0, nmethod::state_offset())); + __ subs(r2, r2, nmethod::in_use); + __ b(dispatch, Assembler::NE); + + // We have the address of an on stack replacement routine in r0 + // We need to prepare to execute the OSR method. First we must + // migrate the locals and monitors off of the stack. + + __ mov(r4, r0); // save the nmethod + + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); + + // r0 is OSR buffer, ensure it's in the expected parameter location + assert(j_rarg0 == r0, "assumed"); + + // remove activation + // get sender sp + __ ldr(rscratch1, + Address(rfp, frame::get_interpreter_frame_sender_sp_offset() * wordSize)); + // remove frame anchor + __ leave(); + __ mov(sp, rscratch1); + // Ensure compiled code always sees stack at proper alignment + __ align_stack(); + + // and begin the OSR nmethod + __ ldr(rscratch1, Address(r4, nmethod::osr_entry_point_offset())); + __ b(rscratch1); + } + } +} + + +void TemplateTable::if_0cmp(Condition cc) +{ + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + /*if (cc == equal) { + __ cmp(r0, 0); + __ b(not_taken, Assembler::NE); + } else if (cc == not_equal) { + __ cmp(r0, 0); + __ b(not_taken, Assembler::EQ); + } else { + __ ands(rscratch1, r0, r0); + __ b(not_taken, j_not(cc)); + }*/ + __ cmp(r0, 0); + __ b(not_taken, j_not(cc)); + + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(r0); +} + +void TemplateTable::if_icmp(Condition cc) +{ + transition(itos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ pop_i(r1); + __ reg_printf("Comparing TOS = %p, and SOS = %p\n", r0, r1); + __ cmp(r1, r0); + __ b(not_taken, j_not(cc)); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(r0); +} + +void TemplateTable::if_nullcmp(Condition cc) +{ + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + if (cc == equal) + __ cbnz(r0, not_taken); + else + __ cbz(r0, not_taken); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(r0); +} + +void TemplateTable::if_acmp(Condition cc) +{ + transition(atos, vtos); + // assume branch is more often taken than not (loops use backward branches) + Label not_taken; + __ pop_ptr(r1); + __ cmpoop(r1, r0); + __ b(not_taken, j_not(cc)); + branch(false, false); + __ bind(not_taken); + __ profile_not_taken_branch(r0); +} + +void TemplateTable::ret() { + transition(vtos, vtos); + // We might be moving to a safepoint. The thread which calls + // Interpreter::notice_safepoints() will effectively flush its cache + // when it makes a system call, but we need to do something to + // ensure that we see the changed dispatch table. + __ membar(MacroAssembler::LoadLoad); + + locals_index(r1); + __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp + __ profile_ret(r1, r2); + __ ldr(rbcp, Address(rmethod, Method::const_offset())); + __ lea(rbcp, Address(rbcp, r1)); + __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset())); + __ dispatch_next(vtos, 0, /*generate_poll*/true); +} + +void TemplateTable::wide_ret() { + transition(vtos, vtos); + locals_index_wide(r1); + __ ldr(r1, aaddress(r1)); // get return bci, compute return bcp + __ profile_ret(r1, r2); + __ ldr(rbcp, Address(rmethod, Method::const_offset())); + __ lea(rbcp, Address(rbcp, r1)); + __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset())); + __ dispatch_next(vtos, 0, /*generate_poll*/true); +} + + +void TemplateTable::tableswitch() { + Label default_case, continue_execution; + transition(itos, vtos); + // align rbcp + __ lea(r1, at_bcp(BytesPerInt)); + __ bic(r1, r1, BytesPerInt - 1); + // load lo & hi + __ ldr(r2, Address(r1, BytesPerInt)); + __ ldr(r3, Address(r1, 2 * BytesPerInt)); + __ rev(r2, r2); + __ rev(r3, r3); + // check against lo & hi + __ cmp(r0, r2); + __ b(default_case, Assembler::LT); + __ cmp(r0, r3); + __ b(default_case, Assembler::GT); + // lookup dispatch offset + __ sub(r0, r0, r2); + __ lea(r3, Address(r1, r0, lsl(2))); + __ ldr(r3, Address(r3, 3 * BytesPerInt)); + __ profile_switch_case(r0, r1, r2); + // continue execution + __ bind(continue_execution); + __ rev(r3, r3); + __ load_unsigned_byte(rscratch1, Address(rbcp, r3)); + __ add(rbcp, rbcp, r3); + __ dispatch_only(vtos, /*generate_poll*/true); + // handle default + __ bind(default_case); + __ profile_switch_default(r0); + __ ldr(r3, Address(r1, 0)); + __ b(continue_execution); +} + +void TemplateTable::lookupswitch() { + transition(itos, itos); + __ stop("lookupswitch bytecode should have been rewritten"); +} + +void TemplateTable::fast_linearswitch() { + transition(itos, vtos); + Label loop_entry, loop, found, continue_execution; + + __ reg_printf("Linearswitching to value %d\n", r0); + + // bswap r0 so we can avoid bswapping the table entries + __ rev(r0, r0); + // align rbcp + __ lea(r14, at_bcp(BytesPerInt)); // btw: should be able to get rid of + // this instruction (change offsets + // below) + __ bic(r14, r14, BytesPerInt - 1); + // set counter + __ ldr(r1, Address(r14, BytesPerInt)); + __ rev(r1, r1); + __ b(loop_entry); + // table search + __ bind(loop); + __ lea(rscratch1, Address(r14, r1, lsl(3))); + __ ldr(rscratch1, Address(rscratch1, 2 * BytesPerInt)); + __ cmp(r0, rscratch1); + __ b(found, Assembler::EQ); + __ bind(loop_entry); + __ subs(r1, r1, 1); + __ b(loop, Assembler::PL); + // default case + __ profile_switch_default(r0); + __ ldr(r3, Address(r14, 0)); + __ b(continue_execution); + // entry found -> get offset + __ bind(found); + __ lea(rscratch1, Address(r14, r1, lsl(3))); + __ ldr(r3, Address(rscratch1, 3 * BytesPerInt)); + __ profile_switch_case(r1, r0, r14); + // continue execution + __ bind(continue_execution); + __ rev(r3, r3); + __ add(rbcp, rbcp, r3); + __ ldrb(rscratch1, Address(rbcp, 0)); + __ dispatch_only(vtos, /*generate_poll*/true); +} + +void TemplateTable::fast_binaryswitch() { + transition(itos, vtos); + // Implementation using the following core algorithm: + // + // int binary_search(int key, LookupswitchPair* array, int n) { + // // Binary search according to "Methodik des Programmierens" by + // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. + // int i = 0; + // int j = n; + // while (i+1 < j) { + // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) + // // with Q: for all i: 0 <= i < n: key < a[i] + // // where a stands for the array and assuming that the (inexisting) + // // element a[n] is infinitely big. + // int h = (i + j) >> 1; + // // i < h < j + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // } + // // R: a[i] <= key < a[i+1] or Q + // // (i.e., if key is within array, i is the correct index) + // return i; + // } + + // Register allocation + const Register key = r0; // already set (tosca) + const Register array = r1; + const Register i = r2; + const Register j = r3; + const Register h = rscratch1; + const Register temp = rscratch2; + + // Find array start + __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to + // get rid of this + // instruction (change + // offsets below) + __ bic(array, array, BytesPerInt - 1); + + // Initialize i & j + __ mov(i, 0); // i = 0; + __ ldr(j, Address(array, -BytesPerInt)); // j = length(array); + + // Convert j into native byteordering + __ rev(j, j); + + // And start + Label entry; + __ b(entry); + + // binary search loop + { + Label loop; + __ bind(loop); + // int h = (i + j) >> 1; + __ add(h, i, j); // h = i + j; + __ lsr(h, h, 1); // h = (i + j) >> 1; + // if (key < array[h].fast_match()) { + // j = h; + // } else { + // i = h; + // } + // Convert array[h].match to native byte-ordering before compare + __ ldr(temp, Address(array, h, lsl(3))); + __ rev(temp, temp); + __ cmp(key, temp); + // j = h if (key < array[h].fast_match()) + __ mov(j, h, Assembler::LT); + // i = h if (key >= array[h].fast_match()) + __ mov(i, h, Assembler::GE); + // while (i+1 < j) + __ bind(entry); + __ add(h, i, 1); // i+1 + __ cmp(h, j); // i+1 < j + __ b(loop, Assembler::LT); + } + + // end of binary search, result index is i (must check again!) + Label default_case; + // Convert array[i].match to native byte-ordering before compare + __ ldr(temp, Address(array, i, lsl(3))); + __ rev(temp, temp); + __ cmp(key, temp); + __ b(default_case, Assembler::NE); + + // entry found -> j = offset + __ add(j, array, i, lsl(3)); + __ ldr(j, Address(j, BytesPerInt)); + __ profile_switch_case(i, key, array); + __ rev(j, j); + __ load_unsigned_byte(rscratch1, Address(rbcp, j)); + __ lea(rbcp, Address(rbcp, j)); + __ dispatch_only(vtos, /*generate_poll*/true); + + // default case -> j = default offset + __ bind(default_case); + __ profile_switch_default(i); + __ ldr(j, Address(array, -2 * BytesPerInt)); + __ rev(j, j); + __ load_unsigned_byte(rscratch1, Address(rbcp, j)); + __ lea(rbcp, Address(rbcp, j)); + __ dispatch_only(vtos, /*genrate_poll*/true); +} + +void TemplateTable::_return(TosState state) +{ + __ reg_printf("STARTING RETURN\n"); + //__ stop("_return"); + transition(state, state); + if(ltos == state) { + __ reg_printf("Doing long return, tos value is 0x%08x%08x\n", r1, r0); + } else if ( itos == state || atos == state) { + __ reg_printf("Doing int/ref return, tos value is 0x%08x\n", r0); + } + + assert(_desc->calls_vm(), + "inconsistent calls_vm information"); // call in remove_activation + + if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { + assert(state == vtos, "only valid state"); + + __ reg_printf("A\n"); + __ ldr(c_rarg1, aaddress(0)); + __ reg_printf("object is = %p\nB\n", c_rarg1); + __ load_klass(r3, c_rarg1); + __ reg_printf("C\n"); + __ ldr(r3, Address(r3, Klass::access_flags_offset())); + __ reg_printf("D\n"); + __ tst(r3, JVM_ACC_HAS_FINALIZER); + __ reg_printf("E\n"); + Label skip_register_finalizer; + __ b(skip_register_finalizer, Assembler::EQ); + __ reg_printf("About to call into the VM\n"); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), c_rarg1); + __ reg_printf("F\n"); + __ bind(skip_register_finalizer); + } + + // Issue a StoreStore barrier after all stores but before return + // from any constructor for any class with a final field. We don't + // know if this is a finalizer, so we always do so. + if (_desc->bytecode() == Bytecodes::_return) + __ membar(MacroAssembler::StoreStore); + + // Narrow result if state is itos but result type is smaller. + // Need to narrow in the return bytecode rather than in generate_return_entry + // since compiled code callers expect the result to already be narrowed. + if (state == itos) { + __ narrow(r0); + } + + __ reg_printf("About to attmpt to remove activation with rfp = %p\n", rfp); + __ remove_activation(state); + __ reg_printf("Finshed _return, about to jump to lr = %p\n", lr); + __ b(lr); +} + +// ---------------------------------------------------------------------------- +// Volatile variables demand their effects be made known to all CPU's +// in order. Store buffers on most chips allow reads & writes to +// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode +// without some kind of memory barrier (i.e., it's not sufficient that +// the interpreter does not reorder volatile references, the hardware +// also must not reorder them). +// +// According to the new Java Memory Model (JMM): +// (1) All volatiles are serialized wrt to each other. ALSO reads & +// writes act as aquire & release, so: +// (2) A read cannot let unrelated NON-volatile memory refs that +// happen after the read float up to before the read. It's OK for +// non-volatile memory refs that happen before the volatile read to +// float down below it. +// (3) Similar a volatile write cannot let unrelated NON-volatile +// memory refs that happen BEFORE the write float down to after the +// write. It's OK for non-volatile memory refs that happen after the +// volatile write to float up before it. +// +// We only put in barriers around volatile refs (they are expensive), +// not _between_ memory refs (that would require us to track the +// flavor of the previous memory refs). Requirements (2) and (3) +// require some barriers before volatile stores and after volatile +// loads. These nearly cover requirement (1) but miss the +// volatile-store-volatile-load case. This final case is placed after +// volatile-stores although it could just as well go before +// volatile-loads. + +//Note none of these calls use rscratch1, well some do but are set again before return +// so index can be rscratch1 ( I think ) +void TemplateTable::resolve_cache_and_index(int byte_no, + Register Rcache, + Register index, + size_t index_size) { + // Note none of the functions called here use any rscratch + // call_VM may do but will save the argument first! + const Register temp = rscratch2; + assert_different_registers(Rcache, index, temp); + + Label resolved; + + Bytecodes::Code code = bytecode(); + switch (code) { + case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; + case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; + } + + assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); + __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); + __ cmp(temp, (int) code); // have we resolved this bytecode? + __ b(resolved, Assembler::EQ); + + __ reg_printf("Not resolved, resolving, with rthread = %p, rfp = %p\n", rthread, rfp); + // resolve first time through + address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); + __ mov(temp, (int) code); + __ call_VM(noreg, entry, temp); + __ reg_printf("Resolve complete\n"); + + // Update registers with resolved info + __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); + // n.b. unlike x86 Rcache is now rcpool plus the indexed offset + // so all clients ofthis method must be modified accordingly + __ bind(resolved); +} + +// The Rcache and index registers must be set before call +// n.b unlike x86 cache already includes the index offset +void TemplateTable::load_field_cp_cache_entry(Register obj, + Register cache, + Register index, + Register off, + Register flags, + bool is_static = false) { + assert_different_registers(cache, index, flags, off); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + // Field offset + __ ldr(off, Address(cache, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::f2_offset()))); + // Flags + __ ldr(flags, Address(cache, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset()))); + + // klass overwrite register + if (is_static) { + __ ldr(obj, Address(cache, in_bytes(cp_base_offset + + ConstantPoolCacheEntry::f1_offset()))); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ldr(obj, Address(obj, mirror_offset)); + __ resolve_oop_handle(obj, r3); + } +} + +void TemplateTable::load_invoke_cp_cache_entry(int byte_no, + Register method, + Register itable_index, + Register flags, + bool is_invokevirtual, + bool is_invokevfinal, /*unused*/ + bool is_invokedynamic) { + // setup registers + const Register cache = rscratch1; + const Register index = r14; + assert_different_registers(method, flags); + assert_different_registers(method, cache, index); + assert_different_registers(itable_index, flags); + assert_different_registers(itable_index, cache, index); + // determine constant pool cache field offsets + assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant"); + const int method_offset = in_bytes( + ConstantPoolCache::base_offset() + + (is_invokevirtual + ? ConstantPoolCacheEntry::f2_offset() + : ConstantPoolCacheEntry::f1_offset())); + const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()); + // access constant pool cache fields + const int index_offset = in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()); + + size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + __ ldr(method, Address(cache, method_offset)); + + if (itable_index != noreg) { + __ ldr(itable_index, Address(cache, index_offset)); + } + __ ldr(flags, Address(cache, flags_offset)); + + __ reg_printf("Invocation, index = %d\n", index); +} + + +// The registers cache and index expected to be set before call. +// Correct values of the cache and index registers are preserved. +void TemplateTable::jvmti_post_field_access(Register cache, Register index, + bool is_static, bool has_tos) { + // do the JVMTI work here to avoid disturbing the register state below + // We use c_rarg registers here because we want to use the register used in + // the call to the VM + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + assert_different_registers(cache, index, r0); + __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr())); + __ ldr(r0, Address(rscratch1)); + __ cmp(r0, 0); + __ b(L1, Assembler::EQ); + + __ get_cache_and_index_at_bcp(c_rarg2, c_rarg3, 1); + __ lea(c_rarg2, Address(c_rarg2, in_bytes(ConstantPoolCache::base_offset()))); + + if (is_static) { + __ mov(c_rarg1, 0); // NULL object reference + } else { + __ ldr(c_rarg1, at_tos()); // get object pointer without popping it + __ verify_oop(c_rarg1); + } + // c_rarg1: object pointer or NULL + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + c_rarg1, c_rarg2, c_rarg3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::pop_and_check_object(Register r) +{ + __ pop_ptr(r); + __ null_check(r); // for field access must check obj. + __ verify_oop(r); +} + +void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + //__ stop("getfield or static"); + //FIXME Find a better way than this! + const Register cache = r2; + const Register index = r3; + const Register obj = r14; + const Register off = rscratch2; //pop_and_check_object + const Register flags = r0; + const Register bc = r14; // uses same reg as obj, so don't mix them + const Register bytecode = r1; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + jvmti_post_field_access(cache, index, is_static, false); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + if (!is_static) { + // obj is on the stack + // trashes rscratch1 + pop_and_check_object(obj); + } + + const Address field(obj, off); + + Label Done, notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble, + notVolatileLong, notVolatileDouble, DoneBarrier; + + // Don't rewrite getstatic, only getfield + if (is_static) rc = may_not_rewrite; + + __ extract_bits(bytecode, flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); + + assert(btos == 0, "change code, btos != 0"); + __ cbnz(bytecode, notByte); + + // btos + __ access_load_tos_at(T_BYTE, IN_HEAP, field, noreg, noreg); + __ push(btos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bgetfield, bc, bytecode); + } + __ b(Done); + + __ bind(notByte); + __ cmp(bytecode, ztos); + __ b(notBool, Assembler::NE); + + // ztos (same code as btos) + __ access_load_tos_at(T_BOOLEAN, IN_HEAP, field, noreg, noreg); + __ push(ztos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + // use btos rewriting, no truncating to t/f bit is needed for getfield. + patch_bytecode(Bytecodes::_fast_bgetfield, bc, bytecode); + } + __ b(Done); + + __ bind(notBool); + __ cmp(bytecode, atos); + __ b(notObj, Assembler::NE); + // atos + do_oop_load(_masm, field, r0, IN_HEAP); + __ push(atos); + __ reg_printf("Getfield or static, atos = 0x%08x\n", r0); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_agetfield, bc, bytecode); + } + __ b(Done); + + __ bind(notObj); + __ cmp(bytecode, itos); + __ b(notInt, Assembler::NE); + // itos + __ access_load_tos_at(T_INT, IN_HEAP, field, noreg, noreg); + __ push(itos); + __ reg_printf("Getfield or static, itos = 0x%08x\n", r0); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_igetfield, bc, bytecode); + } + __ b(Done); + + __ bind(notInt); + __ cmp(bytecode, ctos); + __ b(notChar, Assembler::NE); + // ctos + __ access_load_tos_at(T_CHAR, IN_HEAP, field, noreg, noreg); + __ push(ctos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cgetfield, bc, bytecode); + } + __ b(Done); + + __ bind(notChar); + __ cmp(bytecode, stos); + __ b(notShort, Assembler::NE); + // stos + __ access_load_tos_at(T_SHORT, IN_HEAP, field, noreg, noreg); + __ push(stos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sgetfield, bc, bytecode); + } + __ b(Done); + + __ bind(notShort); + __ cmp(bytecode, ltos); + __ b(notLong, Assembler::NE); + // ltos + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatileLong); + __ access_load_tos_at(T_LONG, IN_HEAP | MO_SEQ_CST, field, bytecode, noreg); // don't need bytecode anymore + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ push(ltos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lgetfield, bc, bytecode); + } + __ b(DoneBarrier); + + __ bind(notVolatileLong); + __ access_load_tos_at(T_LONG, IN_HEAP, field, noreg, noreg); + __ push(ltos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lgetfield, bc, bytecode); + } + __ b(Done); + + __ bind(notLong); + __ cmp(bytecode, ftos); + __ b(notFloat, Assembler::NE); + // ftos + __ access_load_tos_at(T_FLOAT, IN_HEAP, field, bytecode, noreg); // don't need bytecode anymore + __ push(ftos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fgetfield, bc, bytecode); + } + __ b(Done); + + __ bind(notFloat); +#ifdef ASSERT + __ cmp(bytecode, dtos); + __ b(notDouble, Assembler::NE); +#endif + // dtos + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatileDouble); + __ access_load_tos_at(T_DOUBLE, IN_HEAP | MO_SEQ_CST, field, bytecode, noreg); // don't need bytecode anymore + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ push(dtos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dgetfield, bc, bytecode); + } + __ b(DoneBarrier); + + __ bind(notVolatileDouble); + __ access_load_tos_at(T_DOUBLE, IN_HEAP, field, bytecode, noreg); // don't need bytecode anymore + __ push(dtos); + // Rewrite bytecode to be faster + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dgetfield, bc, bytecode); + } + __ b(DoneBarrier); +#ifdef ASSERT + + __ bind(notDouble); + __ stop("Bad state"); +#endif + + __ bind(Done); + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, DoneBarrier); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ bind(DoneBarrier); +} + +void TemplateTable::getfield(int byte_no) { + getfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_getfield(int byte_no) { + getfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::getstatic(int byte_no) { + getfield_or_static(byte_no, true); +} + +// The registers cache and index expected to be set before call. +// The function may destroy various registers, just not the cache and index registers. +void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { + transition(vtos, vtos); + + ByteSize cp_base_offset = ConstantPoolCache::base_offset(); + + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L1; + assert_different_registers(cache, index, r0); + __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr())); + __ ldr(r0, Address(rscratch1)); + __ cbz(r0, L1); + + __ get_cache_and_index_at_bcp(c_rarg2, rscratch1, 1); + + if (is_static) { + // Life is simple. Null out the object pointer. + __ mov(c_rarg1, 0); + } else { + // Life is harder. The stack holds the value on top, followed by + // the object. We don't know the size of the value, though; it + // could be one or two words depending on its type. As a result, + // we must find the type to determine where the object is. + __ ldr(c_rarg3, Address(c_rarg2, + in_bytes(cp_base_offset + + ConstantPoolCacheEntry::flags_offset()))); + __ lsr(c_rarg3, c_rarg3, + ConstantPoolCacheEntry::tos_state_shift); + ConstantPoolCacheEntry::verify_tos_state_shift(); + Label nope2, done, ok; + __ ldr(c_rarg1, at_tos_p1()); // initially assume a one word jvalue + __ cmp(c_rarg3, ltos); + __ b(ok, Assembler::EQ); + __ cmp(c_rarg3, dtos); + __ b(nope2, Assembler::NE); + __ bind(ok); + __ ldr(c_rarg1, at_tos_p2()); // ltos (two word jvalue) + __ bind(nope2); + } + // cache entry pointer + __ add(c_rarg2, c_rarg2, in_bytes(cp_base_offset)); + // object (tos) + __ mov(c_rarg3, sp); + // c_rarg1: object pointer set up above (NULL if static) + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + c_rarg1, c_rarg2, c_rarg3); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } +} + +void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { + transition(vtos, vtos); + const Register cache = r2; + const Register index = rscratch1; + const Register obj = r2; + const Register off = r3; + const Register flags = r14; + const Register bc = rscratch2; + + resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); + __ reg_printf("Putfield or static, index = %d\n", index); + jvmti_post_field_mod(cache, index, is_static); + load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); + + Label Done; + { + Label notVolatile; + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::StoreStore); + __ bind(notVolatile); + } + __ reg_printf("Putfield or static B\n"); + + // field address + const Address field(obj, off); + + Label notByte, notBool, notInt, notShort, notChar, + notLong, notFloat, notObj, notDouble, DoneBarrier; + + __ extract_bits(rscratch1, flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); + + __ cmp(rscratch1, btos); + __ b(notByte, Assembler::NE); + + // Don't rewrite putstatic, only putfield + if (is_static) rc = may_not_rewrite; + // btos + { + __ pop(btos); + if (!is_static) { + pop_and_check_object(obj); + } + __ access_store_tos_at(T_BYTE, IN_HEAP, field, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_bputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notByte); + __ cmp(rscratch1, ztos); + __ b(notBool, Assembler::NE); + + // ztos + { + __ pop(ztos); + if (!is_static) pop_and_check_object(obj); + __ access_store_tos_at(T_BOOLEAN, IN_HEAP, field, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_zputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notBool); + __ cmp(rscratch1, atos); + __ b(notObj, Assembler::NE); + + // atos + { + __ pop(atos); + if (!is_static) { + pop_and_check_object(obj); + } + // Store into the field + do_oop_store(_masm, field, r0, IN_HEAP); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notObj); + __ cmp(rscratch1, itos); + __ b(notInt, Assembler::NE); + + // itos + { + __ pop(itos); + if (!is_static) pop_and_check_object(obj); + __ access_store_tos_at(T_INT, IN_HEAP, field, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_iputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notInt); + __ cmp(rscratch1, ctos); + __ b(notChar, Assembler::NE); + + // ctos + { + __ pop(ctos); + if (!is_static) { + pop_and_check_object(obj); + } + __ access_store_tos_at(T_CHAR, IN_HEAP, field, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_cputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notChar); + __ cmp(rscratch1, stos); + __ b(notShort, Assembler::NE); + + // stos + { + __ pop(stos); + if (!is_static) { + pop_and_check_object(obj); + } + __ access_store_tos_at(T_SHORT, IN_HEAP, field, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_sputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notShort); + __ cmp(rscratch1, ltos); + __ b(notLong, Assembler::NE); + + // ltos + { + Label nonVolatileLong; + __ pop(ltos); + if (!is_static) pop_and_check_object(obj); + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, nonVolatileLong); + __ lea(flags, field); + __ access_store_tos_at(T_LONG, IN_HEAP | MO_SEQ_CST, Address(flags), r2, r3); // trashes index===rscratch1 + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, r1, true, byte_no); + } + __ membar(MacroAssembler::StoreLoad); + __ b(DoneBarrier); + __ bind(nonVolatileLong); + __ access_store_tos_at(T_LONG, IN_HEAP, field, noreg, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lputfield, bc, r1, true, byte_no); + } + __ b(DoneBarrier); + } + + __ bind(notLong); + __ cmp(rscratch1, ftos); + __ b(notFloat, Assembler::NE); + + // ftos + { + __ pop(ftos); + if (!is_static) pop_and_check_object(obj); + __ access_store_tos_at(T_FLOAT, IN_HEAP, field, index, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_fputfield, bc, r1, true, byte_no); + } + __ b(Done); + } + + __ bind(notFloat); +#ifdef ASSERT + __ cmp(rscratch1, dtos); + __ b(notDouble, Assembler::NE); +#endif // ASSERT + + // dtos + { + Label nonVolatileDouble; + __ pop(dtos); + if (!is_static) pop_and_check_object(obj); + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, nonVolatileDouble); + __ access_store_tos_at(T_DOUBLE, IN_HEAP | MO_SEQ_CST, field, r2, r3); // trashes index===rscratch1 + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, r1, true, byte_no); + } + __ membar(MacroAssembler::StoreLoad); + __ b(DoneBarrier); + __ bind(nonVolatileDouble); + __ access_store_tos_at(T_DOUBLE, IN_HEAP, field, index, noreg); + if (rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_dputfield, bc, r1, true, byte_no); + } + __ b(DoneBarrier); + } + +#ifdef ASSERT + __ b(Done); + + __ bind(notDouble); + __ stop("Bad state"); +#endif // ASSERT + + __ bind(Done); + + { + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, DoneBarrier); + __ membar(MacroAssembler::StoreLoad); + } + __ bind(DoneBarrier); + //FIXME find a more elegant way! + __ get_dispatch(); +} + +void TemplateTable::putfield(int byte_no) { + putfield_or_static(byte_no, false); +} + +void TemplateTable::nofast_putfield(int byte_no) { + putfield_or_static(byte_no, false, may_not_rewrite); +} + +void TemplateTable::putstatic(int byte_no) { + putfield_or_static(byte_no, true); +} + +void TemplateTable::jvmti_post_fast_field_mod() +{ + if (JvmtiExport::can_post_field_modification()) { + // Check to see if a field modification watch has been set before + // we take the time to call into the VM. + Label L2; + __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr())); + __ ldr(c_rarg3, Address(rscratch1)); + __ cmp(c_rarg3, 0); + __ b(L2, Assembler::EQ); + __ pop_ptr(r14); // copy the object pointer from tos + __ verify_oop(r14); + __ push_ptr(r14); // put the object pointer back on tos + // Save tos values before call_VM() clobbers them. Since we have + // to do it for every data type, we use the saved values as the + // jvalue object. + switch (bytecode()) { // load values into the jvalue object + case Bytecodes::_fast_aputfield: __ push_ptr(r0); break; + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ push_i(r0); break; + case Bytecodes::_fast_dputfield: + if(hasFPU()) { + __ push_d(); + } else { + __ push_l(); + } + break; + case Bytecodes::_fast_fputfield: + if(hasFPU()) { + __ push_f(); + } else { + __ push_i(); + } + break; + case Bytecodes::_fast_lputfield: __ push_l(r0); break; + + default: + ShouldNotReachHere(); + } + __ mov(c_rarg3, sp); // points to jvalue on the stack + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(c_rarg2, r0, 1); + __ verify_oop(r14); + // r14: object pointer copied above + // c_rarg2: cache entry pointer + // c_rarg3: jvalue object on the stack + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_modification), + r14, c_rarg2, c_rarg3); + + switch (bytecode()) { // restore tos values + case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break; + case Bytecodes::_fast_fputfield: + if(hasFPU()) { + __ pop_f(); break; + } + case Bytecodes::_fast_bputfield: // fall through + case Bytecodes::_fast_zputfield: // fall through + case Bytecodes::_fast_sputfield: // fall through + case Bytecodes::_fast_cputfield: // fall through + case Bytecodes::_fast_iputfield: __ pop_i(r0); break; + case Bytecodes::_fast_dputfield: + if(hasFPU()) { + __ pop_d(); break; + } + case Bytecodes::_fast_lputfield: __ pop_l(r0); break; + } + __ bind(L2); + } +} + +void TemplateTable::fast_storefield(TosState state) +{ + transition(state, vtos); + + ByteSize base = ConstantPoolCache::base_offset(); + + jvmti_post_fast_field_mod(); + + // access constant pool cache + __ get_cache_and_index_at_bcp(r2, rscratch1, 1); // index not used + + Register flags = r14; + // test for volatile with r14 + __ ldr(flags, Address(r2, in_bytes(base + + ConstantPoolCacheEntry::flags_offset()))); + + // replace index with field offset from cache entry + __ ldr(r3, Address(r2, in_bytes(base + ConstantPoolCacheEntry::f2_offset()))); + + // Get object from stack + pop_and_check_object(r2); + + // field address + const Address field(r2, r3); + + // long and double need special processing, see below + // the rest only need barrier before if field is volatile + if (bytecode() != Bytecodes::_fast_dputfield && bytecode() != Bytecodes::_fast_lputfield) { + Label notVolatile; + __ tbz(r14, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::StoreStore); + __ bind(notVolatile); + } + + // access field + switch (bytecode()) { + case Bytecodes::_fast_aputfield: + do_oop_store(_masm, field, r0, IN_HEAP); + break; + case Bytecodes::_fast_dputfield: + { + Label notVolatile, cont; + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::StoreStore); + __ access_store_tos_at(T_DOUBLE, IN_HEAP | MO_SEQ_CST, field, r2, r3); // trashes rscratch1, ok to reuse r2, r3 + __ b(cont); + __ bind(notVolatile); + __ access_store_tos_at(T_DOUBLE, IN_HEAP, field, rscratch1, noreg); + __ bind(cont); + } + break; + case Bytecodes::_fast_lputfield: + { + Label notVolatile, cont; + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::StoreStore); + __ access_store_tos_at(T_LONG, IN_HEAP | MO_SEQ_CST, field, r2, r3); // trashes rscratch1, ok to reuse r2, r3 + __ b(cont); + __ bind(notVolatile); + __ access_store_tos_at(T_LONG, IN_HEAP, field, rscratch1, noreg); + __ bind(cont); + } + break; + case Bytecodes::_fast_fputfield: + __ access_store_tos_at(T_FLOAT, IN_HEAP, field, rscratch1, noreg); + break; + case Bytecodes::_fast_iputfield: + __ access_store_tos_at(T_INT, IN_HEAP, field, noreg, noreg); + break; + case Bytecodes::_fast_zputfield: + __ access_store_tos_at(T_BOOLEAN, IN_HEAP, field, noreg, noreg); + break; + case Bytecodes::_fast_bputfield: + __ access_store_tos_at(T_BYTE, IN_HEAP, field, noreg, noreg); + break; + case Bytecodes::_fast_sputfield: + __ access_store_tos_at(T_SHORT, IN_HEAP, field, noreg, noreg); + break; + case Bytecodes::_fast_cputfield: + __ access_store_tos_at(T_CHAR, IN_HEAP, field, noreg, noreg); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ tbz(flags, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::StoreLoad); + __ bind(notVolatile); + } +} + + +void TemplateTable::fast_accessfield(TosState state) +{ + transition(atos, state); + // Do the JVMTI work here to avoid disturbing the register state below + if (JvmtiExport::can_post_field_access()) { + // Check to see if a field access watch has been set before we + // take the time to call into the VM. + Label L1; + __ lea(rscratch1, ExternalAddress((address) JvmtiExport::get_field_access_count_addr())); + __ ldr(r2, Address(rscratch1)); + __ cmp(r2, 0); + __ b(L1, Assembler::EQ); + // access constant pool cache entry + __ get_cache_entry_pointer_at_bcp(c_rarg2, rscratch2, 1); + __ verify_oop(r0); + __ push_ptr(r0); // save object pointer before call_VM() clobbers it + __ mov(c_rarg1, r0); + // c_rarg1: object pointer copied above + // c_rarg2: cache entry pointer + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), + c_rarg1, c_rarg2); + __ pop_ptr(r0); // restore object pointer + __ bind(L1); + } + + // access constant pool cache + __ get_cache_and_index_at_bcp(r2, r1, 1); + __ ldr(r1, Address(r2, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + __ ldr(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()))); + + // r0: object + __ verify_oop(r0); + __ null_check(r0); + const Address field(r0, r1); + + // access field + switch (bytecode()) { + case Bytecodes::_fast_agetfield: + do_oop_load(_masm, field, r0, IN_HEAP); + __ verify_oop(r0); + break; + case Bytecodes::_fast_dgetfield: + { + Label notVolatile, cont; + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ access_load_tos_at(T_DOUBLE, IN_HEAP | MO_SEQ_CST, field, r2, r3); // trashes rscratch1 + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ b(cont); + __ bind(notVolatile); + __ access_load_tos_at(T_DOUBLE, IN_HEAP, field, rscratch1, noreg); + __ bind(cont); + } + break; + case Bytecodes::_fast_lgetfield: + { + Label notVolatile, cont; + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ access_load_tos_at(T_LONG, IN_HEAP | MO_SEQ_CST, field, r2, r3); // trashes rscratch1 + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ b(cont); + __ bind(notVolatile); + __ access_load_tos_at(T_LONG, IN_HEAP, field, noreg, noreg); + __ bind(cont); + } + break; + case Bytecodes::_fast_fgetfield: + __ access_load_tos_at(T_FLOAT, IN_HEAP, field, rscratch1, noreg); + break; + case Bytecodes::_fast_igetfield: + __ access_load_tos_at(T_INT, IN_HEAP, field, noreg, noreg); + break; + case Bytecodes::_fast_bgetfield: + __ access_load_tos_at(T_BYTE, IN_HEAP, field, noreg, noreg); + break; + case Bytecodes::_fast_sgetfield: + __ access_load_tos_at(T_SHORT, IN_HEAP, field, noreg, noreg); + break; + case Bytecodes::_fast_cgetfield: + __ access_load_tos_at(T_CHAR, IN_HEAP, field, noreg, noreg); + break; + default: + ShouldNotReachHere(); + } + // long and double have barrier already placed + if (bytecode() != Bytecodes::_fast_dgetfield && bytecode() != Bytecodes::_fast_lgetfield) { + Label notVolatile; + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ bind(notVolatile); + } +} + +void TemplateTable::fast_xaccess(TosState state) +{ + transition(vtos, state); + + // get receiver + __ ldr(r0, aaddress(0)); + // access constant pool cache + __ get_cache_and_index_at_bcp(r2, r3, 2); + __ ldr(r1, Address(r2, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::f2_offset()))); + // make sure exception is reported in correct bcp range (getfield is + // next instruction) + __ add(rbcp, rbcp, 1); + __ null_check(r0); + + Address field(r0, r1); + switch (state) { + case ftos: + __ access_load_tos_at(T_FLOAT, IN_HEAP, field, r0, noreg); + break; + case itos: + __ access_load_tos_at(T_INT, IN_HEAP, field, noreg, noreg); + break; + case atos: + do_oop_load(_masm, field, r0, IN_HEAP); + __ verify_oop(r0); + break; + default: + ShouldNotReachHere(); + } + + { + Label notVolatile; + __ ldr(r3, Address(r2, in_bytes(ConstantPoolCache::base_offset() + + ConstantPoolCacheEntry::flags_offset()))); + __ tbz(r3, ConstantPoolCacheEntry::is_volatile_shift, notVolatile); + __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); + __ bind(notVolatile); + } + + __ sub(rbcp, rbcp, 1); +} + + + +//----------------------------------------------------------------------------- +// Calls + +void TemplateTable::count_calls(Register method, Register temp) { + // implemented elsewhere + ShouldNotReachHere(); +} + +void TemplateTable::prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index, // itable index, MethodType, etc. + Register recv, // if caller wants to see it + Register flags // if caller wants to test it + ) { + // determine flags + Bytecodes::Code code = bytecode(); + const bool is_invokeinterface = code == Bytecodes::_invokeinterface; + const bool is_invokedynamic = code == Bytecodes::_invokedynamic; + const bool is_invokehandle = code == Bytecodes::_invokehandle; + const bool is_invokevirtual = code == Bytecodes::_invokevirtual; + const bool is_invokespecial = code == Bytecodes::_invokespecial; + const bool load_receiver = (recv != noreg); + const bool save_flags = (flags != noreg); + assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), ""); + assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); + assert(flags == noreg || flags == r3, ""); + assert(recv == noreg || recv == r2, ""); + + // setup registers & access constant pool cache + if (recv == noreg) recv = r2; + if (flags == noreg) flags = r3; + assert_different_registers(method, index, recv, flags); + + // save 'interpreter return address' + __ save_bcp(); + + load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); + + // maybe push appendix to arguments (just before return address) + if (is_invokedynamic || is_invokehandle) { + Label L_no_push; + __ tbz(flags, ConstantPoolCacheEntry::has_appendix_shift, L_no_push); + // Push the appendix as a trailing parameter. + // This must be done before we get the receiver, + // since the parameter_size includes it. + __ push(r14); //NOT NEEDED?! + __ mov(r14, index); + assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); + __ load_resolved_reference_at_index(index, r14); + __ pop(r14); + __ push(index); // push appendix (MethodType, CallSite, etc.) + __ bind(L_no_push); + } + + // load receiver if needed (note: no return address pushed yet) + if (load_receiver) { + __ andr(recv, flags, ConstantPoolCacheEntry::parameter_size_mask); + // const int no_return_pc_pushed_yet = -1; // argument slot correction before we push return address + // const int receiver_is_at_end = -1; // back off one slot to get receiver + // Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); + // __ movptr(recv, recv_addr); + + __ add(rscratch1, sp, recv, lsl(2)); + __ ldr(recv, Address(rscratch1, -Interpreter::expr_offset_in_bytes(1))); + __ verify_oop(recv); + } + + // compute return type + // x86 uses a shift and mask or wings it with a shift plus assert + // the mask is not needed. aarch32 just uses bitfield extract + __ extract_bits(rscratch2, flags, ConstantPoolCacheEntry::tos_state_shift, ConstantPoolCacheEntry::tos_state_bits); + // load return address + { + const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); + __ mov(rscratch1, table_addr); + __ ldr(lr, Address(rscratch1, rscratch2, lsl(2))); + } +} + + +void TemplateTable::invokevirtual_helper(Register index, + Register recv, + Register flags) +{ + // Uses temporary registers r0, r3 + assert_different_registers(index, recv, r0, r3); + // Test for an invoke of a final method + Label notFinal; + __ tbz(flags, ConstantPoolCacheEntry::is_vfinal_shift, notFinal); + + __ reg_printf("It's a virtual final call\n"); + const Register method = index; // method must be rmethod + assert(method == rmethod, + "methodOop must be rmethod for interpreter calling convention"); + + // do the call - the index is actually the method to call + // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method* + + // It's final, need a null check here! + __ null_check(recv); + + // profile this call + __ profile_final_call(r0); + __ profile_arguments_type(r0, method, rscratch2, true); + + __ jump_from_interpreted(method, r0); + + __ bind(notFinal); + __ reg_printf("It's not a virtual final call\n"); + // get receiver klass + __ null_check(recv, oopDesc::klass_offset_in_bytes()); + __ load_klass(r0, recv); + + // profile this call + __ profile_virtual_call(r0, rlocals, r3); + + // get target methodOop & entry point + __ lookup_virtual_method(r0, index, method); + __ profile_arguments_type(r3, method, rscratch2, true); + + __ jump_from_interpreted(method, r3); +} + +void TemplateTable::invokevirtual(int byte_no) +{ + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + + __ reg_printf("Invokevirtual, the sp is %p\n", sp); + prepare_invoke(byte_no, rmethod, noreg, r2, r3); + + // rmethod: index (actually a Method*) + // r2: receiver + // r3: flags + + invokevirtual_helper(rmethod, r2, r3); +} + +void TemplateTable::invokespecial(int byte_no) +{ + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + __ ldr(rscratch1, Address(sp)); + __ reg_printf("Stack pointer is %p, tos word = %p\n", sp, rscratch1); + + prepare_invoke(byte_no, rmethod, noreg, // get f1 Method* + r2); // get receiver also for null check + + __ verify_oop(r2); + __ null_check(r2); + + // do the call + __ profile_call(r0); + __ profile_arguments_type(r0, rmethod, rbcp, false); + __ jump_from_interpreted(rmethod, r0); +} + +void TemplateTable::invokestatic(int byte_no) +{ + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, rmethod); // get f1 Method* + // do the call + __ profile_call(r0); + __ profile_arguments_type(r0, rmethod, rscratch2, false); + __ jump_from_interpreted(rmethod, r0); +} + +void TemplateTable::fast_invokevfinal(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); + __ stop("fast_invokevfinal not used on aarch32");} + +void TemplateTable::invokeinterface(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + Register temp = rdispatch; //free at this point and reloaded later + prepare_invoke(byte_no, r0, rmethod, // get f1 Klass*, f2 Method* + r2, r3); // recv, flags + + + __ create_breakpoint(); + // r0: interface klass (from f1) + // rmethod: method (from f2) + // r2: receiver + // r3: flags + + // First check for Object case, then private interface method, + // then regular interface method. + + // Special case of invokeinterface called for virtual method of + // java.lang.Object. See cpCache.cpp for details. + Label notObjectMethod; + __ tbz(r3, ConstantPoolCacheEntry::is_forced_virtual_shift, notObjectMethod); + + __ reg_printf("ABC: Invoking invokevirtual_helper\n"); + invokevirtual_helper(rmethod, r2, r3); //loads lr too + __ bind(notObjectMethod); + + Label no_such_interface; + + // Check for private method invocation - indicated by vfinal + Label notVFinal; + __ tbz(r3, ConstantPoolCacheEntry::is_vfinal_shift, notVFinal); + + // Get receiver klass into r3 - also a null check + __ null_check(r2, oopDesc::klass_offset_in_bytes()); + __ load_klass(r3, r2); + + Label subtype; + __ check_klass_subtype(r3, r0, temp, subtype); + // If we get here the typecheck failed + __ b(no_such_interface); + __ bind(subtype); + + __ profile_final_call(r0); + __ profile_arguments_type(r0, rmethod, temp, true); + __ jump_from_interpreted(rmethod, r0); + + __ bind(notVFinal); + + __ reg_printf("ABC: invokeinterface says 'It's not a method'\n"); + // Get receiver klass into r3 - also a null check + __ restore_locals(); + __ null_check(r2, oopDesc::klass_offset_in_bytes()); + __ load_klass(r3, r2); + + Label no_such_method; + + // Preserve method in r1 for throw_AbstractMethodErrorVerbose. + __ mov(r1, rmethod); + // Receiver subtype check against REFC. + // Superklass in r0. Subklass in r3. + __ lookup_interface_method(// inputs: rec. class, interface, itable index + r3, r0, noreg, + // outputs: scan temp. reg, scan temp. reg + rbcp, temp, + no_such_interface, + /*return_method=*/false); + + + // profile this call + __ restore_bcp(); // rbcp was destroyed by receiver type check + __ profile_virtual_call(r3, temp, r0); + + // Get declaring interface class from method, and itable index + __ ldr(r0, Address(rmethod, Method::const_offset())); + __ ldr(r0, Address(r0, ConstMethod::constants_offset())); + __ ldr(r0, Address(r0, ConstantPool::pool_holder_offset_in_bytes())); + __ ldr(rmethod, Address(rmethod, Method::itable_index_offset())); + assert(Method::itable_index_max <= 0, "incorrect below"); + __ add(temp, rmethod, -Method::itable_index_max); + __ neg(rmethod, temp); + + // Preserve recvKlass for throw_AbstractMethodErrorVerbose. + __ mov(rlocals, r3); + __ lookup_interface_method(// inputs: rec. class, interface, itable index + rlocals, r0, rmethod, + // outputs: method, scan temp. reg + rmethod, temp, + no_such_interface); + + // rmethod,: methodOop to call + // r2: receiver + // Check for abstract method error + // Note: This should be done more efficiently via a throw_abstract_method_error + // interpreter entry point and a conditional jump to it in case of a null + // method. + __ cbz(rmethod, no_such_method); + + __ profile_arguments_type(r3, rmethod, temp, true); + + // do the call + // r2: receiver + // rmethod,: methodOop + __ jump_from_interpreted(rmethod, r3); + __ should_not_reach_here(); + + // exception handling code follows... + // note: must restore interpreter registers to canonical + // state for exception handling to work correctly! + + __ bind(no_such_method); + __ reg_printf("ABC: invokeinterface says 'There's no such method'\n"); + // throw exception + __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + // Pass arguments for generating a verbose error message. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), r3, r1); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + + __ bind(no_such_interface); + __ reg_printf("ABC: invokeinterface says 'There's no such interface'\n"); + // throw exception + __ restore_bcp(); // bcp must be correct for exception handler (was destroyed) + __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) + // Pass arguments for generating a verbose error message. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), r3, r0); + // the call_VM checks for exception, so we should never return here. + __ should_not_reach_here(); + return; +} + +void TemplateTable::invokehandle(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, rmethod, r0, r2); + __ verify_method_ptr(r2); + __ verify_oop(r2); + __ null_check(r2); + + // FIXME: profile the LambdaForm also + + __ profile_final_call(r3); + __ profile_arguments_type(r3, rmethod, rscratch2, true); + + __ jump_from_interpreted(rmethod, r0); +} + +void TemplateTable::invokedynamic(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + + prepare_invoke(byte_no, rmethod, r0); + + // r0: CallSite object (from cpool->resolved_references[]) + // rmethod: MH.linkToCallSite method (from f2) + + // Note: r0_callsite is already pushed by prepare_invoke + + // %%% should make a type profile for any invokedynamic that takes a ref argument + // profile this call + __ profile_call(rbcp); + __ profile_arguments_type(r3, rmethod, rscratch2, false); + + __ verify_oop(r0); + + __ jump_from_interpreted(rmethod, r0); +} + + +//----------------------------------------------------------------------------- +// Allocation + +void TemplateTable::_new() { + transition(vtos, atos); + + __ get_unsigned_2_byte_index_at_bcp(r3, 1); + Label slow_case; + Label done; + Label initialize_header; + Label initialize_object; // including clearing the fields + + __ get_cpool_and_tags(r2, r0); + // Make sure the class we're about to instantiate has been resolved. + // This is done before loading InstanceKlass to be consistent with the order + // how Constant Pool is updated (see ConstantPool::klass_at_put) + const int tags_offset = Array::base_offset_in_bytes(); + __ lea(rscratch1, Address(r0, r3, lsl(0))); + __ ldrb(rscratch1, Address(rscratch1, tags_offset)); + __ cmp(rscratch1, JVM_CONSTANT_Class); + __ b(slow_case, Assembler::NE); + + // get InstanceKlass + __ load_resolved_klass_at_offset(r2, r3, r2, rscratch1); + + // make sure klass is initialized & doesn't have finalizer + // make sure klass is fully initialized + __ ldrb(rscratch1, Address(r2, InstanceKlass::init_state_offset())); + __ cmp(rscratch1, InstanceKlass::fully_initialized); + __ b(slow_case, Assembler::NE); + + // get instance_size in InstanceKlass (scaled to a count of bytes) + __ ldr(r3, Address(r2, Klass::layout_helper_offset())); + // test to see if it has a finalizer or is malformed in some way + __ tbnz(r3, exact_log2(Klass::_lh_instance_slow_path_bit), slow_case); + + // Allocate the instance: + // If TLAB is enabled: + // Try to allocate in the TLAB. + // If fails, go to the slow path. + // Else If inline contiguous allocations are enabled: + // Try to allocate in eden. + // If fails due to heap end, go to slow path. + // + // If TLAB is enabled OR inline contiguous is enabled: + // Initialize the allocation. + // Exit. + // + // Go to slow path. + const bool allow_shared_alloc = + Universe::heap()->supports_inline_contig_alloc(); + + if (UseTLAB) { + __ tlab_allocate(r0, r3, 0, noreg, r1, slow_case); + + if (ZeroTLAB) { + // the fields have been already cleared + __ b(initialize_header); + } else { + // initialize both the header and fields + __ b(initialize_object); + } + } else { + // Allocation in the shared Eden, if allowed. + // + // r3: instance size in bytes + if (allow_shared_alloc) { + __ eden_allocate(r0, r3, 0, r10, slow_case); + } + } + + // If UseTLAB or allow_shared_alloc are true, the object is created above and + // there is an initialize need. Otherwise, skip and go to the slow path. + if (UseTLAB || allow_shared_alloc) { + // The object is initialized before the header. If the object size is + // zero, go directly to the header initialization. + __ bind(initialize_object); + __ sub(r3, r3, sizeof(oopDesc)); + __ cbz(r3, initialize_header); + + // Initialize object fields + { + __ add(rscratch1, r0, sizeof(oopDesc)); + __ mov(rscratch2, 0); + Label loop; + __ bind(loop); + __ str(rscratch2, Address(__ post(rscratch1, BytesPerInt))); + __ sub(r3, r3, BytesPerInt); + __ cbnz(r3, loop); + } + + // initialize object header only. + __ bind(initialize_header); + if (UseBiasedLocking) { + __ ldr(rscratch1, Address(r2, Klass::prototype_header_offset())); + } else { + __ mov(rscratch1, (intptr_t)markOopDesc::prototype()); + } + __ str(rscratch1, Address(r0, oopDesc::mark_offset_in_bytes())); + __ mov(rscratch2, 0); + __ store_klass_gap(r0, rscratch2); // zero klass gap for compressed oops - not using + // not using compressed oops + __ store_klass(r0, r2); // store klass last + +#ifdef DTRACE_ENABLED + { + SkipIfEqual skip(_masm, &DTraceAllocProbes, false); + // Trigger dtrace event for fastpath + __ push(atos); // save the return value + __ call_VM_leaf( + CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), r0); + __ pop(atos); // restore the return value + + } +#endif + __ b(done); + } + + // slow case + __ bind(slow_case); + __ get_constant_pool(c_rarg1); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2); + __ verify_oop(r0); + + // continue + __ bind(done); + + __ reg_printf("New object reference is %p\n", r0); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); +} + +void TemplateTable::newarray() { + transition(itos, atos); + __ load_unsigned_byte(c_rarg1, at_bcp(1)); + __ mov(c_rarg2, r0); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), + c_rarg1, c_rarg2); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); +} + +void TemplateTable::anewarray() { + transition(itos, atos); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + __ reg_printf("Index = %d\n", c_rarg2); + __ get_constant_pool(c_rarg1); + __ mov(c_rarg3, r0); + __ reg_printf("About to call InterpreterRuntime::anewarray\n"); + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), + c_rarg1, c_rarg2, c_rarg3); + __ reg_printf("Finshed call to InterpreterRuntime::anewarray\n"); + // Must prevent reordering of stores for object initialization with stores that publish the new object. + __ membar(Assembler::StoreStore); + __ reg_printf("Finshed anewarray\n"); +} + +void TemplateTable::arraylength() { + transition(atos, itos); + __ null_check(r0, arrayOopDesc::length_offset_in_bytes()); + __ ldr(r0, Address(r0, arrayOopDesc::length_offset_in_bytes())); +} + +void TemplateTable::checkcast() +{ + transition(atos, atos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ cbz(r0, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array + __ get_unsigned_2_byte_index_at_bcp(r14, 1); // r14=index + // See if bytecode has already been quicked + __ add(rscratch1, r3, Array::base_offset_in_bytes()); + __ ldrb(r1, Address(rscratch1, r14)); + __ cmp(r1, JVM_CONSTANT_Class); + __ b(quicked, Assembler::EQ); + + __ push(atos); // save receiver for result, and for GC + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + // vm_result_2 has metadata result + __ get_vm_result_2(r0, rthread); + __ pop(r3); // restore receiver + __ b(resolved); + + // Get superklass in r0 and subklass in r3 + __ bind(quicked); + __ mov(r3, r0); // Save object in r3; r0 needed for subtype check + __ load_resolved_klass_at_offset(r2, r14, r0, rscratch1); // r0 = klass + + __ bind(resolved); + __ load_klass(r1, r3); + + // Generate subtype check. Blows r2. Object in r3. + // Superklass in r0. Subklass in r1. + __ gen_subtype_check(r1, ok_is_subtype); + + // Come here on failure + __ push(r3); + // object is at TOS + __ b(Interpreter::_throw_ClassCastException_entry); + + // Come here on success + __ bind(ok_is_subtype); + __ mov(r0, r3); // Restore object in r3 + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ b(done); + __ bind(is_null); + __ profile_null_seen(r2); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); +} + +void TemplateTable::instanceof() { + transition(atos, itos); + Label done, is_null, ok_is_subtype, quicked, resolved; + __ cbz(r0, is_null); + + // Get cpool & tags index + __ get_cpool_and_tags(r2, r3); // r2=cpool, r3=tags array + __ get_unsigned_2_byte_index_at_bcp(r14, 1); // r14=index + + // See if bytecode has already been quicked + __ add(rscratch1, r3, Array::base_offset_in_bytes()); + __ ldrb(r1, Address(rscratch1, r14)); + __ cmp(r1, JVM_CONSTANT_Class); + __ b(quicked, Assembler::EQ); + + __ push(atos); // save receiver for result, and for GC + __ push_i(r14); // save index (used if profiling) + call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); + // vm_result_2 has metadata result + __ get_vm_result_2(r0, rthread); + __ pop_i(r14); // restore index + __ pop(r3); // restore receiver + __ verify_oop(r3); + __ load_klass(r3, r3); + __ b(resolved); + + // Get superklass in r0 and subklass in r3 + __ bind(quicked); + __ load_klass(r3, r0); + __ load_resolved_klass_at_offset(r2, r14, r0, rscratch1); + + __ bind(resolved); + + // Generate subtype check. Blows r2. + // Superklass in r0. Subklass in r3. + __ gen_subtype_check(r3, ok_is_subtype); + + // Come here on failure + __ mov(r0, 0); + __ b(done); + // Come here on success + __ bind(ok_is_subtype); + __ mov(r0, 1); + + // Collect counts on whether this test sees NULLs a lot or not. + if (ProfileInterpreter) { + __ b(done); + __ bind(is_null); + __ profile_null_seen(r2); + } else { + __ bind(is_null); // same as 'done' + } + __ bind(done); + // r0 = 0: obj == NULL or obj is not an instanceof the specified klass + // r0 = 1: obj != NULL and obj is an instanceof the specified klass +} + +//----------------------------------------------------------------------------- +// Breakpoints +void TemplateTable::_breakpoint() { + // Note: We get here even if we are single stepping.. + // jbug inists on setting breakpoints at every bytecode + // even if we are in single step mode. + + transition(vtos, vtos); + + // get the unpatched byte code + __ get_method(c_rarg1); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::get_original_bytecode_at), + c_rarg1, rbcp); + __ push(r0); + + // post the breakpoint event + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), + rmethod, rbcp); + + // complete the execution of original bytecode + __ pop(rscratch1); + __ dispatch_only_normal(vtos); +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateTable::athrow() { + transition(atos, vtos); + __ null_check(r0); + __ b(Interpreter::throw_exception_entry()); +} + +//----------------------------------------------------------------------------- +// Synchronization +// +// Note: monitorenter & exit are symmetric routines; which is reflected +// in the assembly code structure as well +// +// Stack layout: +// +// [expressions ] <--- sp = expression stack top +// .. +// [expressions ] +// [monitor entry] <--- monitor block top = expression stack bot +// .. +// [monitor entry] +// [frame data ] <--- monitor block bot +// ... +// [saved rbp ] <--- rbp +void TemplateTable::monitorenter() +{ + transition(atos, vtos); + + // check for NULL object + __ null_check(r0); + + const Address monitor_block_top( + rfp, frame::get_interpreter_frame_monitor_block_top_offset() * wordSize); + const Address monitor_block_bot( + rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + Label allocated; + + // initialize entry pointer + __ mov(c_rarg1, 0); // points to free slot or NULL + + // find a free slot in the monitor block (result in c_rarg1) + { + Label entry, loop, exit; + __ ldr(c_rarg3, monitor_block_top); // points to current entry, + // starting with top-most entry + __ lea(c_rarg2, monitor_block_bot); // points to word before bottom + + __ b(entry); + + __ bind(loop); + // check if current entry is used + // if not used then remember entry in c_rarg1 + __ ldr(rscratch1, Address(c_rarg3, BasicObjectLock::obj_offset_in_bytes())); + __ cmp(rscratch1, 0); + __ mov(c_rarg1, c_rarg3, Assembler::EQ); + // check if current entry is for same object + __ cmp(r0, rscratch1); + // if same object then stop searching + __ b(exit, Assembler::EQ); + // otherwise advance to next entry + __ add(c_rarg3, c_rarg3, entry_size); + __ bind(entry); + // check if bottom reached + __ cmp(c_rarg3, c_rarg2); + // if not at bottom then check this entry + __ b(loop, Assembler::NE); + __ bind(exit); + } + + __ cbnz(c_rarg1, allocated); // check if a slot has been found and + // if found, continue with that on + + // allocate one if there's no free slot + { + Label entry, loop; //, no_adjust; + // 1. compute new pointers // rsp: old expression stack top + __ ldr(c_rarg1, monitor_block_bot); // c_rarg1: old expression stack bottom + __ sub(sp, sp, entry_size); // move expression stack top + __ sub(c_rarg1, c_rarg1, entry_size); // move expression stack bottom + __ mov(c_rarg3, sp); // set start value for copy loop + __ str(c_rarg1, monitor_block_bot); // set new monitor block bottom + + //__ cmp(sp, c_rarg3); // Check if we need to move sp + //__ b(no_adjust, Assembler::LO); // to allow more stack space + // for our new sp + //__ sub(sp, sp, 2 * wordSize); + //__ bind(no_adjust); + + __ b(entry); + // 2. move expression stack contents + __ bind(loop); + __ ldr(c_rarg2, Address(c_rarg3, entry_size)); // load expression stack + // word from old location + __ str(c_rarg2, Address(c_rarg3, 0)); // and store it at new location + __ add(c_rarg3, c_rarg3, wordSize); // advance to next word + __ bind(entry); + __ cmp(c_rarg3, c_rarg1); // check if bottom reached + __ b(loop, Assembler::NE); // if not at bottom then + // copy next word + } + + // call run-time routine + // c_rarg1: points to monitor entry + __ bind(allocated); + + // Increment bcp to point to the next bytecode, so exception + // handling for async. exceptions work correctly. + // The object has already been poped from the stack, so the + // expression stack looks correct. + __ add(rbcp, rbcp, 1); //inc + + // store object + __ str(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ lock_object(c_rarg1); + + // check to make sure this monitor doesn't cause stack overflow after locking + __ save_bcp(); // in case of exception + __ generate_stack_overflow_check(0); + + // The bcp has already been incremented. Just need to dispatch to + // next instruction. + __ dispatch_next(vtos); +} + + +void TemplateTable::monitorexit() +{ + transition(atos, vtos); + + // check for NULL object + __ null_check(r0); + + const Address monitor_block_top( + rfp, frame::get_interpreter_frame_monitor_block_top_offset() * wordSize); + const Address monitor_block_bot( + rfp, frame::get_interpreter_frame_initial_sp_offset() * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + Label found; + + // find matching slot + { + Label entry, loop; + __ ldr(c_rarg1, monitor_block_top); // points to current entry, + // starting with top-most entry + __ lea(c_rarg2, monitor_block_bot); // points to word before bottom + // of monitor block + __ b(entry); + + __ bind(loop); + // check if current entry is for same object + __ ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ cmp(r0, rscratch1); + // if same object then stop searching + __ b(found, Assembler::EQ); + // otherwise advance to next entry + __ add(c_rarg1, c_rarg1, entry_size); + __ bind(entry); + // check if bottom reached + __ cmp(c_rarg1, c_rarg2); + // if not at bottom then check this entry + __ b(loop, Assembler::NE); + } + + // error handling. Unlocking was not block-structured + __ call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + // call run-time routine + __ bind(found); + __ push_ptr(r0); // make sure object is on stack (contract with oopMaps) + __ unlock_object(c_rarg1); + __ pop_ptr(r0); // discard object +} + + +// Wide instructions +//J_UPDATE +void TemplateTable::wide() +{ + __ load_unsigned_byte(r14, at_bcp(1)); + __ mov(rscratch1, (address)Interpreter::_wentry_point); + __ ldr(rscratch1, Address(rscratch1, r14, lsl(2))); + __ b(rscratch1); +} + + +// Multi arrays +//J_UPDATE +void TemplateTable::multianewarray() { + transition(vtos, atos); + __ load_unsigned_byte(r0, at_bcp(3)); // get number of dimensions + // last dim is on top of stack; we want address of first one: + // first_addr = last_addr + (ndims - 1) * wordSize + __ lea(c_rarg1, Address(sp, r0, lsl(2))); + __ sub(c_rarg1, c_rarg1, wordSize); + call_VM(r0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), + c_rarg1); + __ load_unsigned_byte(r1, at_bcp(3)); + __ lea(sp, Address(sp, r1, lsl(2))); +} --- /dev/null 2018-09-25 19:25:30.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/templateTable_aarch32.hpp 2018-09-25 19:25:30.000000000 +0300 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_TEMPLATETABLE_AARCH32_64_HPP +#define CPU_AARCH32_VM_TEMPLATETABLE_AARCH32_64_HPP + +static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) + Register index = noreg, // itable index, MethodType, etc. + Register recv = noreg, // if caller wants to see it + Register flags = noreg // if caller wants to test it + ); + static void invokevirtual_helper(Register index, Register recv, + Register flags); + + // Helpers + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); + +#endif // CPU_AARCH32_VM_TEMPLATETABLE_AARCH32_64_HPP --- /dev/null 2018-09-25 19:25:31.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vmStructs_aarch32.hpp 2018-09-25 19:25:31.000000000 +0300 @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_VMSTRUCTS_AARCH32_HPP +#define CPU_AARCH32_VM_VMSTRUCTS_AARCH32_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* JavaCallWrapper */ \ + /******************************/ \ + /******************************/ \ + /* JavaFrameAnchor */ \ + /******************************/ \ + volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) + + +#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) + +#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // CPU_AARCH32_VM_VMSTRUCTS_AARCH32_HPP --- /dev/null 2018-09-25 19:25:32.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vm_version_aarch32.cpp 2018-09-25 19:25:32.000000000 +0300 @@ -0,0 +1,352 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/java.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "utilities/macros.hpp" +#include "vm_version_aarch32.hpp" +#include "compiler/disassembler.hpp" + +#include OS_HEADER_INLINE(os) + +// Next function in another compilation unit to prevent inlining and +// breaking frame size check +extern int aarch32_get_fp_sp_distance(); + +enum ProcessorFeatures VM_Version::_features = FT_NONE; + +static BufferBlob* stub_blob; +static const int stub_size = 550; +volatile bool VM_Version::_is_determine_features_test_running = false; + +extern "C" { + typedef void (*getPsrInfo_stub_t)(void*); +} +static getPsrInfo_stub_t getPsrInfo_stub = NULL; + +typedef unsigned long (*pgetauxval)(unsigned long type); + +bool VM_Version::identify_procline(const char *tag, char **line) { + char *i = *line; + const char EOT = '\t', EOT2 = ':'; // the longest has no tabs + for (; '\0' != *i && EOT != *i && EOT2 != *i; i++); + if (EOT == *i || EOT2 == *i) { + if (!memcmp(*line, tag, i - *line)) { + for (i++; (EOT == *i || EOT2 == *i || ' ' == *i) && '\0' != *i; i++); + if ('\0' != *i) { + *line = i; + return true; + } + } + } + return false; +} + +void VM_Version::get_processor_features() { + _supports_cx8 = true; + _supports_atomic_getset4 = true; + _supports_atomic_getadd4 = true; + _supports_atomic_getset8 = true; + _supports_atomic_getadd8 = true; + + if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); + if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) + FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); + FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 256); + FLAG_SET_DEFAULT(PrefetchFieldsAhead, 256); + FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 256); + + enum ProcessorFeatures f = FT_NONE; + + // Allocate space for the code. + const int code_size = 11 * Assembler::instruction_size; + ResourceMark rm; + CodeBuffer cb("detect_cpu_features", code_size, 0); + MacroAssembler* a = new MacroAssembler(&cb); + jlong test_area; + + // Must be set to true so we can generate the test code. + _features = FT_ALL; + // Emit code. + uint32_t *const code = (uint32_t *)a->pc(); + void (*test)(address addr, uintptr_t offset)=(void(*)(address addr, uintptr_t nonzero))(void *)code; + + a->udiv(r3, r2, r1); // FT_HW_DIVIDE + a->bfc(r1, 1, 1); // FT_ARMV6T2 + a->vneg_f64(d0, d0); // FT_VFPV2 + a->vmov_f64(d0, 1.); // FT_VFPV3 + a->dmb(Assembler::ISH); // FT_ARMV7 + a->ldrexd(r2, r0); // FT_ARMV6K + a->vmov_f64(d0, 0.0); // FT_AdvSIMD + a->crc32b(r3, r2, r1); // FT_CRC32 + a->vmov_f64(d16, 1.); // FT_VFPV3D32 + a->pldw(Address(r0)); // FT_MP_EXT + a->aese(q0, q0); // FT_AES + a->b(lr); + + uint32_t *const code_end = (uint32_t *)a->pc(); + a->flush(); + _features = FT_NONE; + + // Print the detection code. + if (PrintAssembly) { + ttyLocker ttyl; + tty->print_cr("Decoding cpu-feature detection stub at " INTPTR_FORMAT " before execution:", p2i(code)); + Disassembler::decode((u_char*)code, (u_char*)code_end, tty); + } + // Execute code. Illegal instructions will be replaced by 0 in the signal handler. + VM_Version::_is_determine_features_test_running = true; + (*test)((address)&test_area, 1); + VM_Version::_is_determine_features_test_running = false; + + uint32_t *insn = code; + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_HW_DIVIDE); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_ARMV6T2); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_VFPV2); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_VFPV3); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_ARMV7); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_ARMV6K); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_AdvSIMD); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_CRC32); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_VFPV3D32); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_MP_EXT); + if (*insn++ != Assembler::nop_insn) f = (ProcessorFeatures) (f | FT_AES); + + int ncores = 0, cpu = 0, variant = 0, model = 0, revision = 0; + char buf[2048], *i; + if (FILE * fp = fopen("/proc/cpuinfo", "r")) { + while ((i = fgets(buf, 2048, fp))) { + if (identify_procline("processor", &i)) { + ncores++; + } else if (identify_procline("CPU implementer", &i)) { + cpu = strtol(i, NULL, 0); + } else if (identify_procline("CPU variant", &i)) { + variant = strtol(i, NULL, 0); + } else if (identify_procline("CPU part", &i)) { + model = strtol(i, NULL, 0); + } else if (identify_procline("CPU revision", &i)) { + revision = strtol(i, NULL, 0); + } + } + fclose(fp); + } + if (1 == ncores) { + f = (ProcessorFeatures) (f | FT_SINGLE_CORE); + } + + sprintf(buf, "0x%02x:0x%x:0x%03x:%d", cpu, variant, model, revision); + if (f & FT_VFPV2) strcat(buf, ", vfp"); + if (f & FT_VFPV3) strcat(buf, ", vfpv3"); + if (f & FT_VFPV3D32) strcat(buf, ", vfpd32"); + if (f & FT_AdvSIMD) strcat(buf, ", simd, neon"); + if (f & FT_CRC32) strcat(buf, ", crc"); + if (f & FT_AES) strcat(buf, ", aes"); + + _features_string = os::strdup(buf); + + if (FLAG_IS_DEFAULT(UseCRC32)) { + UseCRC32 = (f & FT_CRC32) != 0; + } + if (UseCRC32 && (f & FT_CRC32) == 0) { + warning("UseCRC32 specified, but not supported on this CPU"); + FLAG_SET_DEFAULT(UseCRC32, false); + } + if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { + FLAG_SET_DEFAULT(UseCRC32Intrinsics, true); + } + if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true); + } + if ((f & FT_AdvSIMD) && FLAG_IS_DEFAULT(UseNeon) && (model & ~0x0f0) >= 0xc08) { + UseNeon = true; + } + _features = f; + +#ifdef COMPILER2 + if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { + FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true); + } +#endif // COMPILER2 + + if (f & FT_AdvSIMD) { // don't use UseNeon since Montgomery intrinsics are benefitial even on Cortex-A7 + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, true); + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, true); + } + } else { + if (UseMontgomeryMultiplyIntrinsic || UseMontgomerySquareIntrinsic) { + warning("Montgomery intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); + FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); + } + } + + if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps) && (f & (FT_VFPV2 | FT_AdvSIMD))) { + FLAG_SET_DEFAULT(UseSIMDForMemoryOps, true); + } + +/* if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) { + UseBarriersForVolatile = (_cpuFeatures & CPU_DMB_ATOMICS) != 0; + }*/ + + /*if(!(f & FT_ARMV7) && FLAG_IS_DEFAULT(UseMembar)) { + UseMembar = false; + } else if(UseMembar) { + fprintf(stderr, "Unable to use memory barriers as not on ARMv7, disabling.\n"); + UseMembar = false; + }*/ + if (UseAES) { + if ((f & FT_AES) == 0) + warning("UseAES specified, but not supported on this CPU"); + else + warning("UseAES specified, but not supported"); + FLAG_SET_DEFAULT(UseAES, false); + } + if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { + UseAESIntrinsics = true; + } + + if (UseSHA) { + warning("SHA instructions are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA, false); + } + if (f & FT_AdvSIMD) { + if(FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); + } + if(FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); + } + if(FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { + FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); + } + } else if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { + warning("SHA intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); + } + +} + +static bool get_is_thumb() { + intptr_t x, y; + asm ("mov %0, pc\n" + "mov %1, pc": "=r"(x), "=r"(y)); + return y - x == 2; +} + +void VM_Version::initialize() { + ResourceMark rm; + + stub_blob = BufferBlob::create("getPsrInfo_stub", stub_size); + if (stub_blob == NULL) { + vm_exit_during_initialization("Unable to allocate getPsrInfo_stub"); + } + + get_processor_features(); + + const bool thumb = get_is_thumb(); + + if (FLAG_IS_DEFAULT(VMFrameAPCS)) { + if (thumb) { + FLAG_SET_DEFAULT(VMFrameAPCS, false); + } else { + const int fp_sp_dist = aarch32_get_fp_sp_distance(); + // mov r12, sp + // push {r11, r12, lr, pc} + // sub r11, r12, #4 + const int apcs_dist = 12; + + assert((0 <= fp_sp_dist) && (fp_sp_dist % 4 == 0), "fp/sp sanity check"); + assert(fp_sp_dist <= 16, "Assume leaf function should not save many registers in prolog"); + + FLAG_SET_DEFAULT(VMFrameAPCS, fp_sp_dist == apcs_dist); + } + } + + if (FLAG_IS_DEFAULT(JNIFrameAPCS)) { + FLAG_SET_DEFAULT(JNIFrameAPCS, VMFrameAPCS); + } + + // This machine does not allow a lot of forms of unaligned memory accesses + if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { + FLAG_SET_DEFAULT(UseUnalignedAccesses, false); + } + + if (FrameAPCS && !FLAG_IS_DEFAULT(PreserveFramePointer) && !PreserveFramePointer) { + warning("FrameAPCS enabled, so fp will always hold frame pointer, ignoring disabled PreserveFramePointer!"); + } + + if (thumb && (VMFrameAPCS || JNIFrameAPCS)) { + warning("VM and JNI APCS support is not available when VM is built in Thumb mode"); + } + + FLAG_SET_DEFAULT(CriticalJNINatives, false); +#ifndef HARD_FLOAT_CC + if( !(VM_Version::features() & (FT_VFPV2 | FT_VFPV3)) ) { +#ifdef COMPILER2 + // C2 is only supported on v7+ VFP at this time + vm_exit_during_initialization("Server VM is only supported on ARMv7+ VFP"); +#else + if(FLAG_IS_CMDLINE(UseFPU)) { + warning("FPU is not present on this core"); + } + FLAG_SET_DEFAULT(UseFPU, false); +#endif + } +#endif + +#ifdef COMPILER2 + if ( !(VM_Version::features() & FT_ARMV7) ) { + // C2 is only supported on v7+ VFP at this time + vm_exit_during_initialization("Server VM is only supported on ARMv7+"); + } + + FLAG_SET_DEFAULT(UseFPUForSpilling, true); + + if (FLAG_IS_DEFAULT(MaxVectorSize)) { + // FLAG_SET_DEFAULT(MaxVectorSize, has_simd() ? 16 : 8); + // SIMD/NEON can use 16, but default is 8 because currently + // larger than 8 will disable instruction scheduling + FLAG_SET_DEFAULT(MaxVectorSize, 8); + } + + if (MaxVectorSize > 16) { + FLAG_SET_DEFAULT(MaxVectorSize, 8); + } +#endif + + UNSUPPORTED_OPTION(CriticalJNINatives); +} --- /dev/null 2018-09-25 19:25:33.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vm_version_aarch32.hpp 2018-09-25 19:25:33.000000000 +0300 @@ -0,0 +1,98 @@ +/* + * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_VM_VERSION_AARCH32_HPP +#define CPU_AARCH32_VM_VM_VERSION_AARCH32_HPP + +#include "runtime/globals_extension.hpp" +#include "runtime/vm_version.hpp" +#include "utilities/sizes.hpp" + +enum ProcessorFeatures { + FT_NONE = 0, + FT_HW_DIVIDE = 1, + FT_VFPV2 = 2, + FT_VFPV3 = 4, + FT_VFPV3D32 = 8, + FT_ARMV7 = 16, + FT_ARMV6T2 = 32, + FT_ARMV6K = 64, + FT_SINGLE_CORE = 128, + FT_AdvSIMD = 256, + FT_CRC32 = 512, + FT_MP_EXT = 1024, + FT_AES = 2048, + FT_ALL = 0xffff +}; + +class VM_Version : public Abstract_VM_Version { + public: + // Processor feature lookup. + + enum { + CPU_ARM = 'A', + CPU_BROADCOM = 'B', + CPU_CAVIUM = 'C', + CPU_DEC = 'D', + CPU_INFINEON = 'I', + CPU_MOTOROLA = 'M', + CPU_NVIDIA = 'N', + CPU_AMCC = 'P', + CPU_QUALCOM = 'Q', + CPU_MARVELL = 'V', + CPU_INTEL = 'i', + } cpuFamily; + + // Initialization + static void initialize(); + + private: + static enum ProcessorFeatures _features; + static const char* _cpu_features; + static volatile bool _is_determine_features_test_running; + + static void get_processor_features(); + static bool identify_procline(const char *tag, char **line); + + public: + static enum ProcessorFeatures features() { + return _features; + } + static void features(ProcessorFeatures f) { + _features = f; + } + static bool is_determine_features_test_running() { return _is_determine_features_test_running; } +}; + +#ifdef HARD_FLOAT_CC +inline const bool hasFPU(void) { return true; } +#else +inline bool hasFPU(void) { return (UseFPU); } +#endif + + +#endif // CPU_AARCH32_VM_VM_VERSION_AARCH32_HPP --- /dev/null 2018-09-25 19:25:34.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vm_version_aarch32_2.cpp 2018-09-25 19:25:34.000000000 +0300 @@ -0,0 +1,28 @@ +// Copyright 2013-2018 Azul Systems, Inc. All Rights Reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it under +// the terms of the GNU General Public License version 2 only, as published by +// the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT ANY +// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +// A PARTICULAR PURPOSE. See the GNU General Public License version 2 for more +// details (a copy is included in the LICENSE file that accompanied this code). +// +// You should have received a copy of the GNU General Public License version 2 +// along with this work; if not, write to the Free Software Foundation, Inc., +// 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + +// This file is logical part of vm_version_aarch32.cpp, but contains parts that +// _should_ be in another compilation unit + +int aarch32_get_fp_sp_distance() { + register int fp __asm__ ("r11"); + register int sp __asm__ ("r13"); + return fp - sp; +} --- /dev/null 2018-09-25 19:25:35.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vm_version_ext_aarch32.cpp 2018-09-25 19:25:35.000000000 +0300 @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "memory/allocation.hpp" +#include "memory/allocation.inline.hpp" +#include "runtime/os.inline.hpp" +#include "vm_version_ext_aarch32.hpp" + +// VM_Version_Ext statics +int VM_Version_Ext::_no_of_threads = 0; +int VM_Version_Ext::_no_of_cores = 0; +int VM_Version_Ext::_no_of_sockets = 0; +bool VM_Version_Ext::_initialized = false; +char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; +char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; + +void VM_Version_Ext::initialize_cpu_information(void) { + // do nothing if cpu info has been initialized + if (_initialized) { + return; + } + + int core_id = -1; + int chip_id = -1; + int len = 0; + char* src_string = NULL; + + _no_of_cores = os::processor_count(); + _no_of_threads = _no_of_cores; + _no_of_sockets = _no_of_cores; + snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "AArch32"); + snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "%s", _features_string); + _initialized = true; +} + +int VM_Version_Ext::number_of_threads(void) { + initialize_cpu_information(); + return _no_of_threads; +} + +int VM_Version_Ext::number_of_cores(void) { + initialize_cpu_information(); + return _no_of_cores; +} + +int VM_Version_Ext::number_of_sockets(void) { + initialize_cpu_information(); + return _no_of_sockets; +} + +const char* VM_Version_Ext::cpu_name(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); + return tmp; +} + +const char* VM_Version_Ext::cpu_description(void) { + initialize_cpu_information(); + char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); + if (NULL == tmp) { + return NULL; + } + strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); + return tmp; +} --- /dev/null 2018-09-25 19:25:36.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vm_version_ext_aarch32.hpp 2018-09-25 19:25:36.000000000 +0300 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH64_VM_VM_VERSION_EXT_AARCH64_HPP +#define CPU_AARCH64_VM_VM_VERSION_EXT_AARCH64_HPP + +#include "utilities/macros.hpp" +#include "vm_version_aarch32.hpp" + +class VM_Version_Ext : public VM_Version { + private: + static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; + static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; + + static int _no_of_threads; + static int _no_of_cores; + static int _no_of_sockets; + static bool _initialized; + static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; + static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; + + public: + static int number_of_threads(void); + static int number_of_cores(void); + static int number_of_sockets(void); + + static const char* cpu_name(void); + static const char* cpu_description(void); + static void initialize_cpu_information(void); + +}; + +#endif // CPU_AARCH64_VM_VM_VERSION_EXT_AARCH64_HPP --- /dev/null 2018-09-25 19:25:37.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vmreg_aarch32.cpp 2018-09-25 19:25:37.000000000 +0300 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/vmreg.hpp" + +void VMRegImpl::set_regName() { + int i = 0; + + Register reg = ::as_Register(0); + while (i < ConcreteRegisterImpl::max_gpr) { + regName[i++] = reg->name(); + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + while (i < ConcreteRegisterImpl::max_fpr) { + regName[i++] = freg->name(); + freg = freg->successor(FloatRegisterImpl::SINGLE); + } +} --- /dev/null 2018-09-25 19:25:38.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vmreg_aarch32.hpp 2018-09-25 19:25:38.000000000 +0300 @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_VMREG_AARCH32_HPP +#define CPU_AARCH32_VM_VMREG_AARCH32_HPP + + bool is_Register() { + // BAD_REG should not pass this test. + return (unsigned int) value() < + (unsigned int) ConcreteRegisterImpl::max_gpr; + } + + bool is_FloatRegister() { + return value() >= ConcreteRegisterImpl::max_gpr && + value() < ConcreteRegisterImpl::max_fpr; + } + + Register as_Register() { + assert(is_Register(), "sanity check"); + return ::as_Register(value()); + } + + FloatRegister as_FloatRegister() { + assert(is_FloatRegister(), "sanity check"); + return ::as_FloatRegister(value() - ConcreteRegisterImpl::max_gpr); + } + + inline bool is_concrete() { + assert(is_reg(), "sanity check"); + return true; + } + +#endif // CPU_AARCH32_VM_VMREG_AARCH32_HPP --- /dev/null 2018-09-25 19:25:39.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vmreg_aarch32.inline.hpp 2018-09-25 19:25:39.000000000 +0300 @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_AARCH32_VM_VMREG_AARCH32_INLINE_HPP +#define CPU_AARCH32_VM_VMREG_AARCH32_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if (this == noreg) { + return VMRegImpl::Bad(); + } + return VMRegImpl::as_VMReg(encoding()); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { + if (this == fnoreg) { + return VMRegImpl::Bad(); + } + return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_gpr); +} + +#endif // CPU_AARCH32_VM_VMREG_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:25:40.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/vtableStubs_aarch32.cpp 2018-09-25 19:25:40.000000000 +0300 @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "assembler_aarch32.inline.hpp" +#include "code/vtableStubs.hpp" +#include "interp_masm_aarch32.hpp" +#include "memory/resourceArea.hpp" +#include "oops/compiledICHolder.hpp" +#include "oops/instanceKlass.hpp" +#include "oops/klassVtable.hpp" +#include "runtime/sharedRuntime.hpp" +#include "vmreg_aarch32.inline.hpp" +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif + +// machine-dependent part of VtableStubs: create VtableStub of correct size and +// initialize its code + +#define __ masm-> + +#ifndef PRODUCT +extern "C" void bad_compiled_vtable_index(JavaThread* thread, + oop receiver, + int index); +#endif + +VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { + const int aarch32_code_length = VtableStub::pd_code_size_limit(true); + VtableStub* s = new(aarch32_code_length) VtableStub(true, vtable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), aarch32_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + +#ifndef PRODUCT + if (CountCompiledCalls) { + // FIXME SharedRuntime::nof_megamorphic_calls_addr() returns un-encodable address + __ increment(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()), 1); + } +#endif + + // get receiver (need to skip return address on top of stack) + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + + // get receiver klass + address npe_addr = __ pc(); + __ load_klass(rscratch2, j_rarg0); + +#ifndef PRODUCT + if (DebugVtables) { + Label L; + // check offset vs vtable length + __ ldr(rscratch1, Address(rscratch2, Klass::vtable_length_offset())); + __ cmp(rscratch1, vtable_index * vtableEntry::size()); + __ b(L, Assembler::GT); + __ enter(); + __ mov(r2, vtable_index); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2); + __ leave(); + __ bind(L); + } +#endif // PRODUCT + + __ lookup_virtual_method(rscratch2, vtable_index, rmethod); + + if (DebugVtables) { + Label L; + __ cbz(rmethod, L); + __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ cbnz(rscratch1, L); + __ stop("Vtable entry is NULL"); + __ bind(L); + } + // r0: receiver klass + // rmethod: Method* + // r2: receiver + address ame_addr = __ pc(); + __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); + __ b(rscratch1); + + __ flush(); + + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("vtable #%d at "PTR_FORMAT"[%d] left over: %d", + vtable_index, p2i(s->entry_point()), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + + s->set_exception_points(npe_addr, ame_addr); + return s; +} + + +VtableStub* VtableStubs::create_itable_stub(int itable_index) { + // Note well: pd_code_size_limit is the absolute minimum we can get + // away with. If you add code here, bump the code stub size + // returned by pd_code_size_limit! + const int code_length = VtableStub::pd_code_size_limit(false); + VtableStub* s = new(code_length) VtableStub(false, itable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), code_length); + MacroAssembler* masm = new MacroAssembler(&cb); + +#ifndef PRODUCT + if (CountCompiledCalls) { + // FIXME SharedRuntime::nof_megamorphic_calls_addr() returns un-encodable address + __ increment(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()), 1); + } +#endif + + // Entry arguments: + // rscratch2: CompiledICHolder + // j_rarg0: Receiver + + // Most registers are in use; we'll use rmethod, rscratch1, r4 + // IMPORTANT: r4 is used as a temp register, if it's changed callee-save + // the code should be fixed + // TODO: put an assert here to ensure r4 is caller-save + const Register recv_klass_reg = rscratch1; + const Register holder_klass_reg = rscratch2; // declaring interface klass (DECC) + const Register resolved_klass_reg = rmethod; // resolved interface klass (REFC) + const Register temp_reg = r4; + const Register icholder_reg = rscratch2; + + __ ldr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); + // Destroys icholder value + __ ldr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); + + Label L_no_such_interface; + + // get receiver klass (also an implicit null-check) + address npe_addr = __ pc(); + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + __ load_klass(recv_klass_reg, j_rarg0); + + // Receiver subtype check against REFC. + // Destroys recv_klass_reg value. + __ lookup_interface_method(// inputs: rec. class, interface + recv_klass_reg, resolved_klass_reg, noreg, + // outputs: scan temp. reg1, scan temp. reg2 + recv_klass_reg, temp_reg, + L_no_such_interface, + /*return_method=*/false); + + // Get selected method from declaring class and itable index + __ load_klass(recv_klass_reg, j_rarg0); // restore recv_klass_reg + __ lookup_interface_method(// inputs: rec. class, interface, itable index + recv_klass_reg, holder_klass_reg, itable_index, + // outputs: method, scan temp. reg + rmethod, temp_reg, + L_no_such_interface); + // rmethod: Method* + // j_rarg0: receiver + +#ifdef ASSERT + if (DebugVtables) { + Label L2; + __ cbz(rmethod, L2); + __ ldr(recv_klass_reg, Address(rmethod, Method::from_compiled_offset())); + __ cbnz(recv_klass_reg, L2); + __ stop("compiler entrypoint is null"); + __ bind(L2); + } +#endif // ASSERT + + address ame_addr = __ pc(); + __ ldr(recv_klass_reg, Address(rmethod, Method::from_compiled_offset())); + __ b(recv_klass_reg); + + __ bind(L_no_such_interface); + // Handle IncompatibleClassChangeError in itable stubs. + // More detailed error message. + // We force resolving of the call site by jumping to the "handle + // wrong method" stub, and so let the interpreter runtime do all the + // dirty work. + __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); + + __ flush(); + + if (PrintMiscellaneous && (WizardMode || Verbose)) { + tty->print_cr("itable #%d at "PTR_FORMAT"[%d] left over: %d", + itable_index, p2i(s->entry_point()), + (int)(s->code_end() - s->entry_point()), + (int)(s->code_end() - __ pc())); + } + guarantee(__ pc() <= s->code_end(), "overflowed buffer"); + + s->set_exception_points(npe_addr, ame_addr); + return s; +} + + +int VtableStub::pd_code_size_limit(bool is_vtable_stub) { + int size = DebugVtables ? 216 : 0; // FIXME + if (CountCompiledCalls) + size += 6 * 4; // FIXME. cannot measure, CountCalls does not work + if (is_vtable_stub) { + size += 26; + } else { + size += 160; + if (!(VM_Version::features() & (FT_ARMV7 | FT_ARMV6T2))) { + size += (NativeMovConstReg::mov_n_three_orr_sz - NativeMovConstReg::movw_movt_pair_sz); + } + } + return size; + + // In order to tune these parameters, run the JVM with VM options + // +PrintMiscellaneous and +WizardMode to see information about + // actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops. + // + // If Universe::narrow_klass_base is nonzero, decoding a compressed + // class can take zeveral instructions. Run it with -Xmx31G + // -XX:+UseCompressedOops. + // + // The JVM98 app. _202_jess has a megamorphic interface call. +} + +int VtableStub::pd_code_alignment() { return 4; } --- /dev/null 2018-09-25 19:25:41.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/asm_os_linux_aarch32.s 2018-09-25 19:25:41.000000000 +0300 @@ -0,0 +1,31 @@ +# Copyright 2013-2017 Azul Systems, Inc. All Rights Reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License version 2 only, as published by +# the Free Software Foundation. +# +# This code is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License version 2 for more +# details (a copy is included in the LICENSE file that accompanied this code). +# +# You should have received a copy of the GNU General Public License version 2 +# along with this work; if not, write to the Free Software Foundation, Inc., +# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +# +# Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +# CA 94089 USA or visit www.azul.com if you need additional information or +# have any questions. + +.global linux_aarch32_current_frame_pointer +.type linux_aarch32_current_frame_pointer,%function +linux_aarch32_current_frame_pointer: + mov r0, sp + bx lr + +.global linux_aarch32_previous_frame_pointer + .type linux_aarch32_previous_frame_pointer,%function +linux_aarch32_previous_frame_pointer: + mov r0, fp + bx lr --- /dev/null 2018-09-25 19:25:42.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/assembler_linux_aarch32.cpp 2018-09-25 19:25:42.000000000 +0300 @@ -0,0 +1,27 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// nothing required here --- /dev/null 2018-09-25 19:25:43.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/atomic_linux_aarch32.hpp 2018-09-25 19:25:43.000000000 +0300 @@ -0,0 +1,209 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP +#define OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP + +#include "runtime/os.hpp" +#include "vm_version_aarch32.hpp" + +// Implementation of class atomic + +// various toolchains set different symbols to indicate that ARMv7 architecture is set as a target +// starting from v7 use more lightweight barrier instructions +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) +#define FULL_MEM_BARRIER __asm__ __volatile__ ("dmb ish" : : : "memory") +#define READ_MEM_BARRIER __asm__ __volatile__ ("dmb ish" : : : "memory") +#define WRITE_MEM_BARRIER __asm__ __volatile__ ("dmb ishst" : : : "memory") +#else +#define FULL_MEM_BARRIER __sync_synchronize() +#define READ_MEM_BARRIER __asm__ __volatile__ ("mcr p15,0,r0,c7,c10,5" : : : "memory") +#define WRITE_MEM_BARRIER __asm__ __volatile__ ("mcr p15,0,r0,c7,c10,5" : : : "memory") +#endif + +template<> +template +inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const { + STATIC_ASSERT(8 == sizeof(T)); +// have seen a few toolchains which only set a subset of appropriate defines +// and as well do not provide atomic API, hence so complicated condition +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) + register long long res; + __asm__ __volatile__ ( + "ldrexd %Q[res], %R[res], [%[addr]]" + : [res] "=r" (res) + : [addr] "r" (reinterpret_cast(src)) + : "memory"); + return PrimitiveConversions::cast(res); +#else + return PrimitiveConversions::cast(__atomic_load_n(reinterpret_cast(src), + __ATOMIC_RELAXED)); +#endif +} + +template<> +template +inline void Atomic::PlatformStore<8>::operator()(T store_value, + T volatile* dest) const { + STATIC_ASSERT(8 == sizeof(T)); +// have seen a few toolchains which only set a subset of appropriate defines +// and as well do not provide atomic API, hence so complicated condition +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) + // the below is only supported since ARMv6K, adapt otherwise + register long long t1; + register int t3; + __asm__ __volatile__ ( + "repeat_%=:\n\t" + "ldrexd %Q[t1],%R[t1],[%[addr]]\n\t" + "strexd %[t3],%Q[val],%R[val],[%[addr]]\n\t" + "cmp %[t3],#0\n\t" + "bne repeat_%=" + : [t1] "=&r" (t1), + [t3] "=&r" (t3) + : [val] "r" (PrimitiveConversions::cast(store_value)), + [addr] "r" (reinterpret_cast(dest)) + : "memory"); +#else + __atomic_store_n(reinterpret_cast(dest), + PrimitiveConversions::cast(store_value), __ATOMIC_RELAXED); +#endif +} + +template +struct Atomic::PlatformAdd + : Atomic::AddAndFetch > +{ + template + D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; +}; + +template<> +template +inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(I)); + STATIC_ASSERT(4 == sizeof(D)); + return __sync_add_and_fetch(dest, add_value); +} + +template +template +inline T Atomic::PlatformXchg::operator()(T exchange_value, + T volatile* dest, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T res = __sync_lock_test_and_set(dest, exchange_value); + FULL_MEM_BARRIER; + return res; +} + +// No direct support for cmpxchg of bytes; emulate using int. +template<> +struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; + +template<> +template +inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + if (order == memory_order_relaxed) { + T value = compare_value; + __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return value; + } else { + return __sync_val_compare_and_swap(dest, compare_value, exchange_value); + } +} + +template<> +template +inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(8 == sizeof(T)); + if (order == memory_order_relaxed) { +// have seen a few toolchains which only set a subset of appropriate defines +// and as well do not provide dword CAS, hence so complicated condition +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) + register long long old_value; + register int store_result; + __asm__ __volatile__ ( + "mov %[res],#1\n\t" + "repeat_%=:\n\t" + "ldrexd %Q[old],%R[old],[%[addr]]\n\t" + "cmp %Q[old], %Q[cmpr]\n\t" + "ittt eq\n\t" + "cmpeq %R[old], %R[cmpr]\n\t" + "strexdeq %[res],%Q[exch],%R[exch],[%[addr]]\n\t" + "cmpeq %[res],#1\n\t" + "beq repeat_%=" + : [old] "=&r" (old_value), + [res] "=&r" (store_result) + : [exch] "r" (exchange_value), + [cmpr] "r" (compare_value), + [addr] "r" (dest) + : "memory"); + return old_value; +#else + T value = compare_value; + __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return value; +#endif + } else { +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) + register long long old_value; + register int store_result; + __asm__ __volatile__ ( + "dmb ish\n\t" + "mov %[res],#1\n\t" + "repeat_%=:\n\t" + "ldrexd %Q[old],%R[old],[%[addr]]\n\t" + "cmp %Q[old], %Q[cmpr]\n\t" + "ittt eq\n\t" + "cmpeq %R[old], %R[cmpr]\n\t" + "strexdeq %[res],%Q[exch],%R[exch],[%[addr]]\n\t" + "cmpeq %[res],#1\n\t" + "beq repeat_%=\n\t" + "dmb ish" + : [old] "=&r" (old_value), + [res] "=&r" (store_result) + : [exch] "r" (exchange_value), + [cmpr] "r" (compare_value), + [addr] "r" (dest) + : "memory"); + return old_value; +#else + return __sync_val_compare_and_swap(dest, compare_value, exchange_value); +#endif + } +} + +#endif // OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:25:44.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/bytes_linux_aarch32.inline.hpp 2018-09-25 19:25:44.000000000 +0300 @@ -0,0 +1,46 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_BYTES_LINUX_AARCH32_INLINE_HPP +#define OS_CPU_LINUX_AARCH32_VM_BYTES_LINUX_AARCH32_INLINE_HPP + +#include + +// Efficient swapping of data bytes from Java byte +// ordering to native byte ordering and vice versa. +inline u2 Bytes::swap_u2(u2 x) { + return bswap_16(x); +} + +inline u4 Bytes::swap_u4(u4 x) { + return bswap_32(x); +} + +inline u8 Bytes::swap_u8(u8 x) { + return bswap_64(x); +} + +#endif // OS_CPU_LINUX_AARCH32_VM_BYTES_LINUX_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:25:45.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/copy_linux_aarch32.inline.hpp 2018-09-25 19:25:45.000000000 +0300 @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_COPY_LINUX_AARCH32_INLINE_HPP +#define OS_CPU_LINUX_AARCH32_VM_COPY_LINUX_AARCH32_INLINE_HPP + +static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + (void)memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: + while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_words(from, to, count); +} + +static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(const void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { + pd_conjoint_bytes(from, to, count); +} + +static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { + _Copy_conjoint_jshorts_atomic(from, to, count); +} + +static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { + _Copy_conjoint_jints_atomic(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { + _Copy_conjoint_jlongs_atomic(from, to, count); +} + +static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { + assert(BytesPerInt == BytesPerOop, "jints and oops must be the same size"); + _Copy_conjoint_jints_atomic((jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_bytes(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jshorts(from, to, count); +} + +static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jints(from, to, count); +} + +static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { + _Copy_arrayof_conjoint_jlongs(from, to, count); +} + +static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { + assert(BytesPerInt == BytesPerOop, "jints and oops must be the same size"); + _Copy_arrayof_conjoint_jints(from, to, count); +} + +#endif // OS_CPU_LINUX_AARCH32_VM_COPY_LINUX_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:25:46.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/globals_linux_aarch32.hpp 2018-09-25 19:25:46.000000000 +0300 @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_GLOBALS_LINUX_AARCH32_HPP +#define OS_CPU_LINUX_AARCH32_VM_GLOBALS_LINUX_AARCH32_HPP + +// Set the default values for platform dependent flags used by the runtime +// system (see globals.hpp) + +// DontYieldALot should always be set to false on Linux. +define_pd_global(bool, DontYieldALot, false); + +// Thread stack sizes are given in Kbytes. +define_pd_global(intx, ThreadStackSize, 320); +define_pd_global(intx, VMThreadStackSize, 512); +define_pd_global(intx, CompilerThreadStackSize, 512); + +define_pd_global(uintx, JVMInvokeMethodSlack, 8192); + +// HeapBaseMinAddress is used on 64 bit platforms only. +define_pd_global(uintx, HeapBaseMinAddress, 2*G); + +#endif // OS_CPU_LINUX_AARCH32_VM_GLOBALS_LINUX_AARCH32_HPP --- /dev/null 2018-09-25 19:25:48.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/linux_aarch32.S 2018-09-25 19:25:47.000000000 +0300 @@ -0,0 +1,32 @@ +// +// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2014, Red Hat Inc. All rights reserved. +// Copyright (c) 2015, Linaro Ltd. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. + +// This file is a derivative work resulting from (and including) modifications +// made by Azul Systems, Inc. The dates of such changes are 2013-2018. +// Copyright 2013-2018 Azul Systems, Inc. All Rights Reserved. +// +// Please contact Azul Systems, 385 Moffett Park Drive, Suite 115, Sunnyvale, +// CA 94089 USA or visit www.azul.com if you need additional information or +// have any questions. + --- /dev/null 2018-09-25 19:25:49.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/orderAccess_linux_aarch32.hpp 2018-09-25 19:25:48.000000000 +0300 @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_ORDERACCESS_LINUX_AARCH32_INLINE_HPP +#define OS_CPU_LINUX_AARCH32_VM_ORDERACCESS_LINUX_AARCH32_INLINE_HPP + +#include "runtime/orderAccess.hpp" +#include "runtime/os.hpp" +#include "vm_version_aarch32.hpp" + +// Implementation of class OrderAccess. + +inline void OrderAccess::loadload() { acquire(); } +inline void OrderAccess::storestore() { + WRITE_MEM_BARRIER; +} +inline void OrderAccess::loadstore() { acquire(); } +inline void OrderAccess::storeload() { fence(); } +inline void OrderAccess::acquire() { + READ_MEM_BARRIER; +} +inline void OrderAccess::release() { + READ_MEM_BARRIER; +} +inline void OrderAccess::fence() { + FULL_MEM_BARRIER; +} + +#endif // OS_CPU_LINUX_AARCH32_VM_ORDERACCESS_LINUX_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:25:50.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/os_linux_aarch32.cpp 2018-09-25 19:25:49.000000000 +0300 @@ -0,0 +1,699 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +// No precompiled headers +#include "jvm.h" +#include "asm/macroAssembler.hpp" +#include "classfile/classLoader.hpp" +#include "classfile/systemDictionary.hpp" +#include "classfile/vmSymbols.hpp" +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/allocation.inline.hpp" +#include "nativeInst_aarch32.hpp" +#include "os_share_linux.hpp" +#include "prims/jniFastGetField.hpp" +#include "prims/jvm_misc.hpp" +#include "runtime/arguments.hpp" +#include "runtime/extendedPC.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" +#include "runtime/java.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/osThread.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/thread.inline.hpp" +#include "runtime/timer.hpp" +#include "utilities/debug.hpp" +#include "utilities/events.hpp" +#include "utilities/vmError.hpp" + +// put OS-includes here +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +#define SPELL_REG_SP "sp" +#define SPELL_REG_FP "fp" + +extern "C" { + void *linux_aarch32_current_frame_pointer(); + void *linux_aarch32_previous_frame_pointer(); +} + +address os::current_stack_pointer() { + return (address) linux_aarch32_current_frame_pointer(); +} + +char* os::non_memory_address_word() { + // Must never look like an address returned by reserve_memory, + // even in its subfields (as defined by the CPU immediate fields, + // if the CPU splits constants across multiple instructions). + + return (char*) 0xfffffffful; +} + +void os::initialize_thread(Thread *thr) { +} + +address os::Linux::ucontext_get_pc(const ucontext_t * uc) { + return (address)uc->uc_mcontext.arm_pc; +} + +void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { + uc->uc_mcontext.arm_pc = (intptr_t)pc; +} + +intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.arm_sp; +} + +intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { + return (intptr_t*)uc->uc_mcontext.arm_fp; +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread +// is currently interrupted by SIGPROF. +// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal +// frames. Currently we don't do that on Linux, so it's the same as +// os::fetch_frame_from_context(). +ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, + const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { + + assert(thread != NULL, "just checking"); + assert(ret_sp != NULL, "just checking"); + assert(ret_fp != NULL, "just checking"); + + return os::fetch_frame_from_context(uc, ret_sp, ret_fp); +} + +ExtendedPC os::fetch_frame_from_context(const void* ucVoid, + intptr_t** ret_sp, intptr_t** ret_fp) { + + ExtendedPC epc; + ucontext_t* uc = (ucontext_t*)ucVoid; + + if (uc != NULL) { + epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); + if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); + if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); + } else { + // construct empty ExtendedPC for return value checking + epc = ExtendedPC(NULL); + if (ret_sp) *ret_sp = (intptr_t *)NULL; + if (ret_fp) *ret_fp = (intptr_t *)NULL; + } + + return epc; +} + +frame os::fetch_frame_from_context(const void* ucVoid) { + intptr_t* sp; + intptr_t* fp; + ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); + return frame(sp, fp, epc.pc()); +} + +bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { + address pc = (address) os::Linux::ucontext_get_pc(uc); + if (Interpreter::contains(pc)) { + // interpreter performs stack banging after the fixed frame header has + // been generated while the compilers perform it before. To maintain + // semantic consistency between interpreted and compiled frames, the + // method returns the Java sender of the current frame. + *fr = os::fetch_frame_from_context(uc); + if (!fr->is_first_java_frame()) { + assert(fr->safe_for_sender(thread), "Safety check"); + *fr = fr->java_sender(); + } + } else { + // more complex code with compiled code + assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); + CodeBlob* cb = CodeCache::find_blob(pc); + if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { + // Not sure where the pc points to, fallback to default + // stack overflow handling + return false; + } else { + // In compiled code, the stack banging is performed before LR + // has been saved in the frame. LR is live, and SP and FP + // belong to the caller. + intptr_t* fp = os::Linux::ucontext_get_fp(uc); + intptr_t* sp = os::Linux::ucontext_get_sp(uc); + //TODO: XXX: Merge + // could be pc = os::Linux::ucontext_get_pc(uc) ? + address pc = (address)(uc->uc_mcontext.arm_lr + - NativeInstruction::arm_insn_sz); + *fr = frame(sp, fp, pc); + if (!fr->is_java_frame()) { + assert(fr->safe_for_sender(thread), "Safety check"); + assert(!fr->is_first_frame(), "Safety check"); + *fr = fr->java_sender(); + } + } + } + assert(fr->is_java_frame(), "Safety check"); + return true; +} + +// By default, gcc always saves frame pointer rfp on this stack. This +// may get turned off by -fomit-frame-pointer. +frame os::get_sender_for_C_frame(frame* fr) { +#ifdef __thumb__ + return frame(); +#else + address sender_pc = *(address*) fr->addr_at(fr->get_return_addr_offset(JNIFrameAPCS)); + intptr_t* link = *(intptr_t**) fr->addr_at(fr->get_link_offset(JNIFrameAPCS)); + return frame(fr->sender_sp(), link, sender_pc); +#endif +} + +frame os::current_frame() { +#ifdef __thumb__ + return frame(); +#else + intptr_t* fp = (intptr_t*)linux_aarch32_previous_frame_pointer(); + frame myframe((intptr_t*)os::current_stack_pointer(), + (intptr_t*)fp, + CAST_FROM_FN_PTR(address, os::current_frame)); + // check for C2 frame first, those to not have valid FP + if (!CodeCache::find_blob( + *(address*)myframe.addr_at(myframe.get_return_addr_offset(JNIFrameAPCS))) && + os::is_first_C_frame(&myframe)) { + // stack is not walkable + return frame(); + } else { + return os::get_sender_for_C_frame(&myframe); + } +#endif +} + +// Utility functions + +// From IA32 System Programming Guide +enum { + trap_page_fault = 0xE +}; + +// An operation in Unsafe has faulted. We're going to return to the +// instruction after the faulting load or store. We also set +// pending_unsafe_access_error so that at some point in the future our +// user will get a helpful message. +static address handle_unsafe_access(JavaThread* thread, address pc) { + // pc is the instruction which we must emulate + // doing a no-op is fine: return garbage from the load + // therefore, compute npc + address npc = pc + NativeInstruction::arm_insn_sz; + + // request an async exception + thread->set_pending_unsafe_access_error(); + + // return address of next instruction to execute + return npc; +} + +extern "C" JNIEXPORT int +JVM_handle_linux_signal(int sig, + siginfo_t* info, + void* ucVoid, + int abort_if_unrecognized) { + ucontext_t* uc = (ucontext_t*) ucVoid; + + Thread* t = Thread::current_or_null_safe(); + + // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away + // (no destructors can be run) + os::ThreadCrashProtection::check_crash_protection(sig, t); + + SignalHandlerMark shm(t); + + // Note: it's not uncommon that JNI code uses signal/sigset to install + // then restore certain signal handler (e.g. to temporarily block SIGPIPE, + // or have a SIGILL handler when detecting CPU type). When that happens, + // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To + // avoid unnecessary crash when libjsig is not preloaded, try handle signals + // that do not require siginfo/ucontext first. + + if (sig == SIGPIPE || sig == SIGXFSZ) { + // allow chained handler to go first + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } else { + if (PrintMiscellaneous && (WizardMode || Verbose)) { + char buf[64]; + warning("Ignoring %s - see bugs 4229104 or 646499219", + os::exception_name(sig, buf, sizeof(buf))); + } + return true; + } + } + +#ifdef CAN_SHOW_REGISTERS_ON_ASSERT + if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { + handle_assert_poison_fault(ucVoid, info->si_addr); + return 1; + } +#endif + + JavaThread* thread = NULL; + VMThread* vmthread = NULL; + if (os::Linux::signal_handlers_are_installed) { + if (t != NULL ){ + if(t->is_Java_thread()) { + thread = (JavaThread*)t; + } + else if(t->is_VM_thread()){ + vmthread = (VMThread *)t; + } + } + } +/* + NOTE: does not seem to work on linux. + if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) { + // can't decode this kind of signal + info = NULL; + } else { + assert(sig == info->si_signo, "bad siginfo"); + } +*/ + // decide if this trap can be handled by a stub + address stub = NULL; + + address pc = NULL; + + //%note os_trap_1 + if (info != NULL && uc != NULL && thread != NULL) { + pc = (address) os::Linux::ucontext_get_pc(uc); + + if (StubRoutines::is_safefetch_fault(pc)) { + os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); + return 1; + } + + // Handle ALL stack overflow variations here + if (sig == SIGSEGV) { + address addr = (address) info->si_addr; + + // check if fault address is within thread stack + if (thread->on_local_stack(addr)) { + // stack overflow + if (thread->in_stack_yellow_reserved_zone(addr)) { + thread->disable_stack_yellow_reserved_zone(); + if (thread->thread_state() == _thread_in_Java) { + if (thread->in_stack_reserved_zone(addr)) { + frame fr; + if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { + assert(fr.is_java_frame(), "Must be a Java frame"); + frame activation = + SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); + if (activation.sp() != NULL) { + thread->disable_stack_reserved_zone(); + if (activation.is_interpreted_frame()) { + thread->set_reserved_stack_activation((address)( + activation.fp() + frame::interpreter_frame_initial_sp_offset)); + } else { + thread->set_reserved_stack_activation((address)activation.unextended_sp()); + } + return 1; + } + } + } + // Throw a stack overflow exception. Guard pages will be reenabled + // while unwinding the stack. + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); + } else { + // Thread was in the vm or native code. Return and try to finish. + return 1; + } + } else if (thread->in_stack_red_zone(addr)) { + // Fatal red zone violation. Disable the guard pages and fall through + // to handle_unexpected_exception way down below. + thread->disable_stack_red_zone(); + tty->print_raw_cr("An irrecoverable stack overflow has occurred."); + + // This is a likely cause, but hard to verify. Let's just print + // it as a hint. + tty->print_raw_cr("Please check if any of your loaded .so files has " + "enabled executable stack (see man page execstack(8))"); + } else { + // Accessing stack address below sp may cause SEGV if current + // thread has MAP_GROWSDOWN stack. This should only happen when + // current thread was created by user code with MAP_GROWSDOWN flag + // and then attached to VM. See notes in os_linux.cpp. + if (thread->osthread()->expanding_stack() == 0) { + thread->osthread()->set_expanding_stack(); + if (os::Linux::manually_expand_stack(thread, addr)) { + thread->osthread()->clear_expanding_stack(); + return 1; + } + thread->osthread()->clear_expanding_stack(); + } else { + fatal("recursive segv. expanding stack."); + } + } + } + } + + if (thread->thread_state() == _thread_in_Java) { + // Java thread running in Java code => find exception handler if any + // a fault inside compiled code, the interpreter, or a stub + + // Handle signal from NativeJump::patch_verified_entry(). + if ((sig == SIGILL || sig == SIGTRAP) + && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { + if (TraceTraps) { + tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); + } + stub = SharedRuntime::get_handle_wrong_method_stub(); + } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { + stub = SharedRuntime::get_poll_stub(pc); + } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { + // BugId 4454115: A read from a MappedByteBuffer can fault + // here if the underlying file has been truncated. + // Do not crash the VM in such a case. + CodeBlob* cb = CodeCache::find_blob_unsafe(pc); + CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; + if (nm != NULL && nm->has_unsafe_access()) { + stub = handle_unsafe_access(thread, pc); + } + } + else + + if (sig == SIGFPE && + (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { + stub = + SharedRuntime:: + continuation_for_implicit_exception(thread, + pc, + SharedRuntime:: + IMPLICIT_DIVIDE_BY_ZERO); + } else if (sig == SIGSEGV && + !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { + // Determination of interpreter/vtable stub/compiled code null exception + stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); + } + } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { + // SIGILL must be caused by VM_Version::get_processor_features(). + *(int *)pc = Assembler::nop_insn; // patch instruction to NOP to indicate that it causes a SIGILL, + // flushing of icache is not necessary. + stub = pc + 4; // continue with next instruction. + } else if (thread->thread_state() == _thread_in_vm && + sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access()) { + stub = handle_unsafe_access(thread, pc); + } + + // jni_fast_GetField can trap at certain pc's if a GC kicks in + // and the heap gets shrunk before the field access. + if ((sig == SIGSEGV) || (sig == SIGBUS)) { + address addr = JNI_FastGetField::find_slowcase_pc(pc); + if (addr != (address)-1) { + stub = addr; + } + } + + // Check to see if we caught the safepoint code in the + // process of write protecting the memory serialization page. + // It write enables the page immediately after protecting it + // so we can just return to retry the write. + if ((sig == SIGSEGV) && + os::is_memory_serialize_page(thread, (address) info->si_addr)) { + // Block current thread until the memory serialize page permission restored. + os::block_on_serialize_page_trap(); + return true; + } + } + + if (stub != NULL) { + // save all thread context in case we need to restore it + if (thread != NULL) thread->set_saved_exception_pc(pc); + + os::Linux::ucontext_set_pc(uc, stub); + return true; + } + + // signal-chaining + if (os::Linux::chained_handler(sig, info, ucVoid)) { + return true; + } + + if (!abort_if_unrecognized) { + // caller wants another chance, so give it to him + return false; + } + + if (pc == NULL && uc != NULL) { + pc = os::Linux::ucontext_get_pc(uc); + } + + // unmask current signal + sigset_t newset; + sigemptyset(&newset); + sigaddset(&newset, sig); + sigprocmask(SIG_UNBLOCK, &newset, NULL); + + VMError::report_and_die(t, sig, pc, info, ucVoid); + + ShouldNotReachHere(); + return true; // Mute compiler +} + +void os::Linux::init_thread_fpu_state(void) { +} + +int os::Linux::get_fpu_control_word(void) { + return 0; +} + +void os::Linux::set_fpu_control_word(int fpu_control) { +} + +// Check that the linux kernel version is 2.4 or higher since earlier +// versions do not support SSE without patches. +bool os::supports_sse() { + return true; +} + +bool os::is_allocatable(size_t bytes) { + return true; +} + +//////////////////////////////////////////////////////////////////////////////// +// thread stack + +// Minimum usable stack sizes required to get to user code. Space for +// HotSpot guard pages is added later. +size_t os::Posix::_compiler_thread_min_stack_allowed = (32 DEBUG_ONLY(+ 4)) * K; +size_t os::Posix::_java_thread_min_stack_allowed = (32 DEBUG_ONLY(+ 4)) * K; +size_t os::Posix::_vm_internal_thread_min_stack_allowed = (48 DEBUG_ONLY(+ 4)) * K; + +// return default stack size for thr_type +size_t os::Posix::default_stack_size(os::ThreadType thr_type) { + // default stack size (compiler thread needs larger stack) + size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); + return s; +} + +// Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ JavaThread created by VM does not have glibc +// | glibc guard page | - guard, attached Java thread usually has +// | |/ 1 page glibc guard. +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | HotSpot Guard Pages | - red and yellow pages +// | |/ +// +------------------------+ JavaThread::stack_yellow_zone_base() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// Non-Java thread: +// +// Low memory addresses +// +------------------------+ +// | |\ +// | glibc guard page | - usually 1 page +// | |/ +// P1 +------------------------+ Thread::stack_base() - Thread::stack_size() +// | |\ +// | Normal Stack | - +// | |/ +// P2 +------------------------+ Thread::stack_base() +// +// ** P1 (aka bottom) and size ( P2 = P1 - size) are the address and stack size returned from +// pthread_attr_getstack() + +///////////////////////////////////////////////////////////////////////////// +// helper functions for fatal error handler + + +void os::print_context(outputStream *st, const void *context) { + if (context == NULL) return; + + ucontext_t *uc = (ucontext_t*)context; + st->print_cr("Registers:"); + + for (int r = 0; r < 16; r++) + st->print_cr( "R%d=" INTPTR_FORMAT, r, *((unsigned int*)&uc->uc_mcontext.arm_r0 + r) ); + + st->cr(); + + intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); + st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); + print_hex_dump(st, (address)sp, (address)(sp + 8*sizeof(intptr_t)), sizeof(intptr_t)); + st->cr(); + + // Note: it may be unsafe to inspect memory near pc. For example, pc may + // point to garbage if entry point in an nmethod is corrupted. Leave + // this at the end, and hope for the best. + address pc = os::Linux::ucontext_get_pc(uc); + st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); + print_hex_dump(st, pc - 32, pc + 32, sizeof(char)); +} + +void os::print_register_info(outputStream *st, const void *context) { + if (context == NULL) return; + + ucontext_t *uc = (ucontext_t*)context; + + st->print_cr("Register to memory mapping:"); + st->cr(); + + for (int r = 0; r < 16; r++) { + st->print( "R%d=", r); print_location(st, *((unsigned int*)&uc->uc_mcontext.arm_r0 + r)); + } + st->cr(); +} + +void os::setup_fpu() { +} + +#ifndef PRODUCT +void os::verify_stack_alignment() { + assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); +} +#endif + +int os::extra_bang_size_in_bytes() { + // AArch64 does not require the additional stack bang. + // does AArch32? + return 0; +} + +extern "C" { + int SpinPause() { + return 0; + } + + void _Copy_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { + if (from > to) { + jshort *end = from + count; + while (from < end) + *(to++) = *(from++); + } + else if (from < to) { + jshort *end = from; + from += count - 1; + to += count - 1; + while (from >= end) + *(to--) = *(from--); + } + } + void _Copy_conjoint_jints_atomic(jint* from, jint* to, size_t count) { + if (from > to) { + jint *end = from + count; + while (from < end) + *(to++) = *(from++); + } + else if (from < to) { + jint *end = from; + from += count - 1; + to += count - 1; + while (from >= end) + *(to--) = *(from--); + } + } + void _Copy_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { + if (from > to) { + jlong *end = from + count; + while (from < end) + os::atomic_copy64(from++, to++); + } + else if (from < to) { + jlong *end = from; + from += count - 1; + to += count - 1; + while (from >= end) + os::atomic_copy64(from--, to--); + } + } + + void _Copy_arrayof_conjoint_bytes(HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count); + } + void _Copy_arrayof_conjoint_jshorts(HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count * 2); + } + void _Copy_arrayof_conjoint_jints(HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count * 4); + } + void _Copy_arrayof_conjoint_jlongs(HeapWord* from, + HeapWord* to, + size_t count) { + memmove(to, from, count * 8); + } +}; --- /dev/null 2018-09-25 19:25:51.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/os_linux_aarch32.hpp 2018-09-25 19:25:50.000000000 +0300 @@ -0,0 +1,46 @@ +/* + * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_OS_LINUX_AARCH32_HPP +#define OS_CPU_LINUX_AARCH32_VM_OS_LINUX_AARCH32_HPP + + static void setup_fpu(); + static bool supports_sse(); + + static jlong rdtsc(); + + static bool is_allocatable(size_t bytes); + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations + static bool register_code_area(char *low, char *high) { return true; } + + // Atomically copy 64 bits of data + static void atomic_copy64(const volatile void *src, volatile void *dst) { + *(jlong *) dst = *(const jlong *) src; + } + +#endif // OS_CPU_LINUX_AARCH32_VM_OS_LINUX_AARCH32_HPP --- /dev/null 2018-09-25 19:25:52.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/os_linux_aarch32.inline.hpp 2018-09-25 19:25:51.000000000 +0300 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_OS_LINUX_AARCH32_INLINE_HPP +#define OS_CPU_LINUX_AARCH32_VM_OS_LINUX_AARCH32_INLINE_HPP + +#include "runtime/os.hpp" + +// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details +inline jlong os::rdtsc() { + uint64_t res; + uint32_t ts1, ts2; + __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2)); + res = ((uint64_t)ts1 | (uint64_t)ts2 << 32); + return (jlong)res; +} + +#endif // OS_CPU_LINUX_AARCH32_VM_OS_LINUX_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:25:53.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/prefetch_linux_aarch32.inline.hpp 2018-09-25 19:25:52.000000000 +0300 @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_PREFETCH_LINUX_AARCH32_INLINE_HPP +#define OS_CPU_LINUX_AARCH32_VM_PREFETCH_LINUX_AARCH32_INLINE_HPP + +#include "runtime/prefetch.hpp" + + +inline void Prefetch::read (void *loc, intx interval) { +//FIXME Put this back +// if (interval >= 0) +// asm("prfm PLDL1KEEP, [%0, %1]" : : "r"(loc), "r"(interval)); +//FIXME END +} + +inline void Prefetch::write(void *loc, intx interval) { +//FIXME Put this back +// if (interval >= 0) +// asm("prfm PSTL1KEEP, [%0, %1]" : : "r"(loc), "r"(interval)); +//FIXME END +} + +#endif // OS_CPU_LINUX_AARCH32_VM_PREFETCH_LINUX_AARCH32_INLINE_HPP --- /dev/null 2018-09-25 19:25:54.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/thread_linux_aarch32.cpp 2018-09-25 19:25:53.000000000 +0300 @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "memory/metaspaceShared.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" + +frame JavaThread::pd_last_frame() { + assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); + return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is +// currently interrupted by SIGPROF +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, + void* ucontext, bool isInJava) { + + assert(Thread::current() == this, "caller must be current thread"); + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { + return pd_get_top_frame(fr_addr, ucontext, isInJava); +} + +bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { + assert(this->is_Java_thread(), "must be JavaThread"); + JavaThread* jt = (JavaThread *)this; + + // If we have a last_Java_frame, then we should use it even if + // isInJava == true. It should be more reliable than ucontext info. + if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { + *fr_addr = jt->pd_last_frame(); + return true; + } + + // At this point, we don't have a last_Java_frame, so + // we try to glean some information out of the ucontext + // if we were running Java code when SIGPROF came in. + if (isInJava) { + ucontext_t* uc = (ucontext_t*) ucontext; + + intptr_t* ret_fp; + intptr_t* ret_sp; + ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, + &ret_sp, &ret_fp); + if (addr.pc() == NULL || ret_sp == NULL ) { + // ucontext wasn't useful + return false; + } + + if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { + // In the middle of a trampoline call. Bail out for safety. + // This happens rarely so shouldn't affect profiling. + return false; + } + + frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(jt)) { +#ifdef COMPILER2 + frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(jt)) { + // nothing else to try if the frame isn't good + return false; + } + ret_frame = ret_frame2; +#else + // nothing else to try if the frame isn't good + return false; +#endif /* COMPILER2 */ + } + *fr_addr = ret_frame; + return true; + } + + // nothing else to try + return false; +} + +void JavaThread::cache_global_variables() { } + --- /dev/null 2018-09-25 19:25:55.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/thread_linux_aarch32.hpp 2018-09-25 19:25:54.000000000 +0300 @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_THREAD_LINUX_AARCH32_HPP +#define OS_CPU_LINUX_AARCH32_VM_THREAD_LINUX_AARCH32_HPP + + private: +#ifdef ASSERT + // spill stack holds N callee-save registers at each Java call and + // grows downwards towards limit + // we need limit to check we have space for a spill and base so we + // can identify all live spill frames at GC (eventually) + address _spill_stack; + address _spill_stack_base; + address _spill_stack_limit; +#endif // ASSERT + + void pd_initialize() { + _anchor.clear(); + } + + frame pd_last_frame(); + + public: + // Mutators are highly dangerous.... + intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } + void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } + + void set_base_of_stack_pointer(intptr_t* base_sp) { + } + + static ByteSize last_Java_fp_offset() { + return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); + } + + intptr_t* base_of_stack_pointer() { + return NULL; + } + void record_base_of_stack_pointer() { + } + + bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, + bool isInJava); + + bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); +private: + bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); +public: + + // These routines are only used on cpu architectures that + // have separate register stacks (Itanium). + static bool register_stack_overflow() { return false; } + static void enable_register_stack_guard() {} + static void disable_register_stack_guard() {} + +#endif // OS_CPU_LINUX_AARCH32_VM_THREAD_LINUX_AARCH32_HPP --- /dev/null 2018-09-25 19:25:56.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/vmStructs_linux_aarch32.hpp 2018-09-25 19:25:55.000000000 +0300 @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_VMSTRUCTS_LINUX_AARCH32_HPP +#define OS_CPU_LINUX_AARCH32_VM_VMSTRUCTS_LINUX_AARCH32_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ + \ + /******************************/ \ + /* Threads (NOTE: incomplete) */ \ + /******************************/ \ + nonstatic_field(OSThread, _thread_id, OSThread::thread_id_t) \ + nonstatic_field(OSThread, _pthread_id, pthread_t) + + +#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ + \ + /**********************/ \ + /* Posix Thread IDs */ \ + /**********************/ \ + \ + declare_integer_type(OSThread::thread_id_t) \ + declare_unsigned_integer_type(pthread_t) + +#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + +#endif // OS_CPU_LINUX_AARCH32_VM_VMSTRUCTS_LINUX_AARCH32_HPP --- /dev/null 2018-09-25 19:25:57.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/vm_version_linux_aarch32.cpp 2018-09-25 19:25:56.000000000 +0300 @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "runtime/os.hpp" +#include "vm_version_aarch32.hpp" + --- /dev/null 2018-09-25 19:25:58.000000000 +0300 +++ new/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionArm.java 2018-09-25 19:25:57.000000000 +0300 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.debugger; + +public class MachineDescriptionArm extends MachineDescriptionTwosComplement implements MachineDescription { + public long getAddressSize() { + return 4; + } + + public boolean isLP64() { + return false; + } + + public boolean isBigEndian() { + return false; + } +}