--- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/hotspot/src/cpu/arm/vm/macroAssembler_arm.cpp 2016-09-27 10:00:45.287517976 -0400 @@ -0,0 +1,3118 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "ci/ciEnv.hpp" +#include "code/nativeInst.hpp" +#include "compiler/disassembler.hpp" +#include "gc/shared/cardTableModRefBS.hpp" +#include "gc/shared/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "utilities/macros.hpp" +#if INCLUDE_ALL_GCS +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#include "gc/g1/heapRegion.hpp" +#endif + +// Implementation of AddressLiteral + +void AddressLiteral::set_rspec(relocInfo::relocType rtype) { + switch (rtype) { + case relocInfo::oop_type: + // Oops are a special case. Normally they would be their own section + // but in cases like icBuffer they are literals in the code stream that + // we don't have a section for. We use none so that we get a literal address + // which is always patchable. + break; + case relocInfo::external_word_type: + _rspec = external_word_Relocation::spec(_target); + break; + case relocInfo::internal_word_type: + _rspec = internal_word_Relocation::spec(_target); + break; + case relocInfo::opt_virtual_call_type: + _rspec = opt_virtual_call_Relocation::spec(); + break; + case relocInfo::static_call_type: + _rspec = static_call_Relocation::spec(); + break; + case relocInfo::runtime_call_type: + _rspec = runtime_call_Relocation::spec(); + break; + case relocInfo::poll_type: + case relocInfo::poll_return_type: + _rspec = Relocation::spec_simple(rtype); + break; + case relocInfo::none: + break; + default: + ShouldNotReachHere(); + break; + } +} + +// Initially added to the Assembler interface as a pure virtual: +// RegisterConstant delayed_value(..) +// for: +// 6812678 macro assembler needs delayed binding of a few constants (for 6655638) +// this was subsequently modified to its present name and return type +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + ShouldNotReachHere(); + return RegisterOrConstant(-1); +} + + +#ifdef AARCH64 +// Note: ARM32 version is OS dependent +void MacroAssembler::breakpoint(AsmCondition cond) { + if (cond == al) { + brk(); + } else { + Label L; + b(L, inverse(cond)); + brk(); + bind(L); + } +} +#endif // AARCH64 + + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + Register vtable_index, + Register method_result) { + const int base_offset = in_bytes(Klass::vtable_start_offset()) + vtableEntry::method_offset_in_bytes(); + assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord)); + ldr(method_result, Address(recv_klass, base_offset)); +} + + +// Simplified, combined version, good for typical uses. +// Falls through on failure. +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp_reg2, + Register temp_reg3, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, NULL); + bind(L_failure); +}; + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp_reg2, + Label* L_success, + Label* L_failure, + Label* L_slow_path) { + + assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg); + const Register super_check_offset = temp_reg2; + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + Address super_check_offset_addr(super_klass, sco_offset); + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmp(sub_klass, super_klass); + b(*L_success, eq); + + // Check the supertype display: + ldr_u32(super_check_offset, super_check_offset_addr); + + Address super_check_addr(sub_klass, super_check_offset); + ldr(temp_reg, super_check_addr); + cmp(super_klass, temp_reg); // load displayed supertype + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + b(*L_success, eq); + cmp_32(super_check_offset, sc_offset); + if (L_failure == &L_fallthrough) { + b(*L_slow_path, eq); + } else { + b(*L_failure, ne); + if (L_slow_path != &L_fallthrough) { + b(*L_slow_path); + } + } + + bind(L_fallthrough); +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { +#ifdef AARCH64 + NOT_IMPLEMENTED(); +#else + // Note: if used by code that expects a register to be 0 on success, + // this register must be temp_reg and set_cond_codes must be true + + Register saved_reg = noreg; + + // get additional tmp registers + if (temp3_reg == noreg) { + saved_reg = temp3_reg = LR; + push(saved_reg); + } + + assert(temp2_reg != noreg, "need all the temporary registers"); + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg); + + Register cmp_temp = temp_reg; + Register scan_temp = temp3_reg; + Register count_temp = temp2_reg; + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + +#ifndef PRODUCT + inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp); +#endif + + // We will consult the secondary-super array. + ldr(scan_temp, Address(sub_klass, ss_offset)); + + assert(! UseCompressedOops, "search_key must be the compressed super_klass"); + // else search_key is the + Register search_key = super_klass; + + // Load the array length. + ldr(count_temp, Address(scan_temp, Array::length_offset_in_bytes())); + add(scan_temp, scan_temp, Array::base_offset_in_bytes()); + + add(count_temp, count_temp, 1); + + Label L_loop, L_setnz_and_fail, L_fail; + + // Top of search loop + bind(L_loop); + // Notes: + // scan_temp starts at the array elements + // count_temp is 1+size + subs(count_temp, count_temp, 1); + if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) { + // direct jump to L_failure if failed and no cleanup needed + b(*L_failure, eq); // not found and + } else { + b(L_fail, eq); // not found in the array + } + + // Load next super to check + // In the array of super classes elements are pointer sized. + int element_size = wordSize; + ldr(cmp_temp, Address(scan_temp, element_size, post_indexed)); + + // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list + subs(cmp_temp, cmp_temp, search_key); + + // A miss means we are NOT a subtype and need to keep looping + b(L_loop, ne); + + // Falling out the bottom means we found a hit; we ARE a subtype + + // Note: temp_reg/cmp_temp is already 0 and flag Z is set + + // Success. Cache the super we found and proceed in triumph. + str(super_klass, Address(sub_klass, sc_offset)); + + if (saved_reg != noreg) { + // Return success + pop(saved_reg); + } + + b(*L_success); + + bind(L_fail); + // Note1: check "b(*L_failure, eq)" above if adding extra instructions here + if (set_cond_codes) { + movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed + } + if (saved_reg != noreg) { + pop(saved_reg); + } + if (L_failure != &L_fallthrough) { + b(*L_failure); + } + + bind(L_fallthrough); +#endif +} + +// Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same. +Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) { + assert_different_registers(params_base, params_count); + add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize)); + return Address(tmp, -Interpreter::stackElementSize); +} + + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) { + nop(); + } +} + +int MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + bool save_last_java_pc, + Register tmp) { + int pc_offset; + if (last_java_fp != noreg) { + // optional + str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset())); + _fp_saved = true; + } else { + _fp_saved = false; + } + if (AARCH64_ONLY(true) NOT_AARCH64(save_last_java_pc)) { // optional on 32-bit ARM +#ifdef AARCH64 + pc_offset = mov_pc_to(tmp); + str(tmp, Address(Rthread, JavaThread::last_Java_pc_offset())); +#else + str(PC, Address(Rthread, JavaThread::last_Java_pc_offset())); + pc_offset = offset() + VM_Version::stored_pc_adjustment(); +#endif + _pc_saved = true; + } else { + _pc_saved = false; + pc_offset = -1; + } + // According to comment in javaFrameAnchorm SP must be saved last, so that other + // entries are valid when SP is set. + + // However, this is probably not a strong constrainst since for instance PC is + // sometimes read from the stack at SP... but is pushed later (by the call). Hence, + // we now write the fields in the expected order but we have not added a StoreStore + // barrier. + + // XXX: if the ordering is really important, PC should always be saved (without forgetting + // to update oop_map offsets) and a StoreStore barrier might be needed. + + if (last_java_sp == noreg) { + last_java_sp = SP; // always saved + } +#ifdef AARCH64 + if (last_java_sp == SP) { + mov(tmp, SP); + str(tmp, Address(Rthread, JavaThread::last_Java_sp_offset())); + } else { + str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); + } +#else + str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset())); +#endif + + return pc_offset; // for oopmaps +} + +void MacroAssembler::reset_last_Java_frame(Register tmp) { + const Register Rzero = zero_register(tmp); + str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset())); + if (_fp_saved) { + str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset())); + } + if (_pc_saved) { + str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset())); + } +} + + +// Implementation of call_VM versions + +void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) { + assert(number_of_arguments >= 0, "cannot have negative number of arguments"); + assert(number_of_arguments <= 4, "cannot have more than 4 arguments"); + +#ifndef AARCH64 + // Safer to save R9 here since callers may have been written + // assuming R9 survives. This is suboptimal but is not worth + // optimizing for the few platforms where R9 is scratched. + push(RegisterSet(R4) | R9ifScratched); + mov(R4, SP); + bic(SP, SP, StackAlignmentInBytes - 1); +#endif // AARCH64 + call(entry_point, relocInfo::runtime_call_type); +#ifndef AARCH64 + mov(SP, R4); + pop(RegisterSet(R4) | R9ifScratched); +#endif // AARCH64 +} + + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + assert(number_of_arguments >= 0, "cannot have negative number of arguments"); + assert(number_of_arguments <= 3, "cannot have more than 3 arguments"); + + const Register tmp = Rtemp; + assert_different_registers(oop_result, tmp); + + set_last_Java_frame(SP, FP, true, tmp); + +#ifdef ASSERT + AARCH64_ONLY(if (UseCompressedOops || UseCompressedClassPointers) { verify_heapbase("call_VM_helper: heap base corrupted?"); }); +#endif // ASSERT + +#ifndef AARCH64 +#if R9_IS_SCRATCHED + // Safer to save R9 here since callers may have been written + // assuming R9 survives. This is suboptimal but is not worth + // optimizing for the few platforms where R9 is scratched. + + // Note: cannot save R9 above the saved SP (some calls expect for + // instance the Java stack top at the saved SP) + // => once saved (with set_last_Java_frame), decrease SP before rounding to + // ensure the slot at SP will be free for R9). + sub(SP, SP, 4); + bic(SP, SP, StackAlignmentInBytes - 1); + str(R9, Address(SP, 0)); +#else + bic(SP, SP, StackAlignmentInBytes - 1); +#endif // R9_IS_SCRATCHED +#endif + + mov(R0, Rthread); + call(entry_point, relocInfo::runtime_call_type); + +#ifndef AARCH64 +#if R9_IS_SCRATCHED + ldr(R9, Address(SP, 0)); +#endif + ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset())); +#endif + + reset_last_Java_frame(tmp); + + // C++ interp handles this in the interpreter + check_and_handle_popframe(); + check_and_handle_earlyret(); + + if (check_exceptions) { + // check for pending exceptions + ldr(tmp, Address(Rthread, Thread::pending_exception_offset())); +#ifdef AARCH64 + Label L; + cbz(tmp, L); + mov_pc_to(Rexception_pc); + b(StubRoutines::forward_exception_entry()); + bind(L); +#else + cmp(tmp, 0); + mov(Rexception_pc, PC, ne); + b(StubRoutines::forward_exception_entry(), ne); +#endif // AARCH64 + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, tmp); + } +} + +void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) { + call_VM_helper(oop_result, entry_point, 0, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { + assert (arg_1 == R1, "fixed register for arg_1"); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { + assert (arg_1 == R1, "fixed register for arg_1"); + assert (arg_2 == R2, "fixed register for arg_2"); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { + assert (arg_1 == R1, "fixed register for arg_1"); + assert (arg_2 == R2, "fixed register for arg_2"); + assert (arg_3 == R3, "fixed register for arg_3"); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { + // Not used on ARM + Unimplemented(); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { + // Not used on ARM + Unimplemented(); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { +// Not used on ARM + Unimplemented(); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { + // Not used on ARM + Unimplemented(); +} + +// Raw call, without saving/restoring registers, exception handling, etc. +// Mainly used from various stubs. +void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) { + const Register tmp = Rtemp; // Rtemp free since scratched by call + set_last_Java_frame(SP, FP, true, tmp); +#if R9_IS_SCRATCHED + if (save_R9_if_scratched) { + // Note: Saving also R10 for alignment. + push(RegisterSet(R9, R10)); + } +#endif + mov(R0, Rthread); + call(entry_point, relocInfo::runtime_call_type); +#if R9_IS_SCRATCHED + if (save_R9_if_scratched) { + pop(RegisterSet(R9, R10)); + } +#endif + reset_last_Java_frame(tmp); +} + +void MacroAssembler::call_VM_leaf(address entry_point) { + call_VM_leaf_helper(entry_point, 0); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) { + assert (arg_1 == R0, "fixed register for arg_1"); + call_VM_leaf_helper(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) { + assert (arg_1 == R0, "fixed register for arg_1"); + assert (arg_2 == R1, "fixed register for arg_2"); + call_VM_leaf_helper(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) { + assert (arg_1 == R0, "fixed register for arg_1"); + assert (arg_2 == R1, "fixed register for arg_2"); + assert (arg_3 == R2, "fixed register for arg_3"); + call_VM_leaf_helper(entry_point, 3); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) { + assert (arg_1 == R0, "fixed register for arg_1"); + assert (arg_2 == R1, "fixed register for arg_2"); + assert (arg_3 == R2, "fixed register for arg_3"); + assert (arg_4 == R3, "fixed register for arg_4"); + call_VM_leaf_helper(entry_point, 4); +} + +void MacroAssembler::get_vm_result(Register oop_result, Register tmp) { + assert_different_registers(oop_result, tmp); + ldr(oop_result, Address(Rthread, JavaThread::vm_result_offset())); + str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_offset())); + verify_oop(oop_result); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register tmp) { + assert_different_registers(metadata_result, tmp); + ldr(metadata_result, Address(Rthread, JavaThread::vm_result_2_offset())); + str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_2_offset())); +} + +void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) { + if (arg2.is_register()) { + add(dst, arg1, arg2.as_register()); + } else { + add(dst, arg1, arg2.as_constant()); + } +} + +void MacroAssembler::add_slow(Register rd, Register rn, int c) { +#ifdef AARCH64 + if (c == 0) { + if (rd != rn) { + mov(rd, rn); + } + return; + } + if (c < 0) { + sub_slow(rd, rn, -c); + return; + } + if (c > right_n_bits(24)) { + guarantee(rd != rn, "no large add_slow with only one register"); + mov_slow(rd, c); + add(rd, rn, rd); + } else { + int lo = c & right_n_bits(12); + int hi = (c >> 12) & right_n_bits(12); + if (lo != 0) { + add(rd, rn, lo, lsl0); + } + if (hi != 0) { + add(rd, (lo == 0) ? rn : rd, hi, lsl12); + } + } +#else + // This function is used in compiler for handling large frame offsets + if ((c < 0) && (((-c) & ~0x3fc) == 0)) { + return sub(rd, rn, (-c)); + } + int low = c & 0x3fc; + if (low != 0) { + add(rd, rn, low); + rn = rd; + } + if (c & ~0x3fc) { + assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c); + add(rd, rn, c & ~0x3fc); + } else if (rd != rn) { + assert(c == 0, ""); + mov(rd, rn); // need to generate at least one move! + } +#endif // AARCH64 +} + +void MacroAssembler::sub_slow(Register rd, Register rn, int c) { +#ifdef AARCH64 + if (c <= 0) { + add_slow(rd, rn, -c); + return; + } + if (c > right_n_bits(24)) { + guarantee(rd != rn, "no large sub_slow with only one register"); + mov_slow(rd, c); + sub(rd, rn, rd); + } else { + int lo = c & right_n_bits(12); + int hi = (c >> 12) & right_n_bits(12); + if (lo != 0) { + sub(rd, rn, lo, lsl0); + } + if (hi != 0) { + sub(rd, (lo == 0) ? rn : rd, hi, lsl12); + } + } +#else + // This function is used in compiler for handling large frame offsets + if ((c < 0) && (((-c) & ~0x3fc) == 0)) { + return add(rd, rn, (-c)); + } + int low = c & 0x3fc; + if (low != 0) { + sub(rd, rn, low); + rn = rd; + } + if (c & ~0x3fc) { + assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c); + sub(rd, rn, c & ~0x3fc); + } else if (rd != rn) { + assert(c == 0, ""); + mov(rd, rn); // need to generate at least one move! + } +#endif // AARCH64 +} + +void MacroAssembler::mov_slow(Register rd, address addr) { + // do *not* call the non relocated mov_related_address + mov_slow(rd, (intptr_t)addr); +} + +void MacroAssembler::mov_slow(Register rd, const char *str) { + mov_slow(rd, (intptr_t)str); +} + +#ifdef AARCH64 + +// Common code for mov_slow and instr_count_for_mov_slow. +// Returns number of instructions of mov_slow pattern, +// generating it if non-null MacroAssembler is given. +int MacroAssembler::mov_slow_helper(Register rd, intptr_t c, MacroAssembler* masm) { + // This code pattern is matched in NativeIntruction::is_mov_slow. + // Update it at modifications. + + const intx mask = right_n_bits(16); + // 1 movz instruction + for (int base_shift = 0; base_shift < 64; base_shift += 16) { + if ((c & ~(mask << base_shift)) == 0) { + if (masm != NULL) { + masm->movz(rd, ((uintx)c) >> base_shift, base_shift); + } + return 1; + } + } + // 1 movn instruction + for (int base_shift = 0; base_shift < 64; base_shift += 16) { + if (((~c) & ~(mask << base_shift)) == 0) { + if (masm != NULL) { + masm->movn(rd, ((uintx)(~c)) >> base_shift, base_shift); + } + return 1; + } + } + // 1 orr instruction + { + LogicalImmediate imm(c, false); + if (imm.is_encoded()) { + if (masm != NULL) { + masm->orr(rd, ZR, imm); + } + return 1; + } + } + // 1 movz/movn + up to 3 movk instructions + int zeroes = 0; + int ones = 0; + for (int base_shift = 0; base_shift < 64; base_shift += 16) { + int part = (c >> base_shift) & mask; + if (part == 0) { + ++zeroes; + } else if (part == mask) { + ++ones; + } + } + int def_bits = 0; + if (ones > zeroes) { + def_bits = mask; + } + int inst_count = 0; + for (int base_shift = 0; base_shift < 64; base_shift += 16) { + int part = (c >> base_shift) & mask; + if (part != def_bits) { + if (masm != NULL) { + if (inst_count > 0) { + masm->movk(rd, part, base_shift); + } else { + if (def_bits == 0) { + masm->movz(rd, part, base_shift); + } else { + masm->movn(rd, ~part & mask, base_shift); + } + } + } + inst_count++; + } + } + assert((1 <= inst_count) && (inst_count <= 4), "incorrect number of instructions"); + return inst_count; +} + +void MacroAssembler::mov_slow(Register rd, intptr_t c) { +#ifdef ASSERT + int off = offset(); +#endif + (void) mov_slow_helper(rd, c, this); + assert(offset() - off == instr_count_for_mov_slow(c) * InstructionSize, "size mismatch"); +} + +// Counts instructions generated by mov_slow(rd, c). +int MacroAssembler::instr_count_for_mov_slow(intptr_t c) { + return mov_slow_helper(noreg, c, NULL); +} + +int MacroAssembler::instr_count_for_mov_slow(address c) { + return mov_slow_helper(noreg, (intptr_t)c, NULL); +} + +#else + +void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) { + if (AsmOperand::is_rotated_imm(c)) { + mov(rd, c, cond); + } else if (AsmOperand::is_rotated_imm(~c)) { + mvn(rd, ~c, cond); + } else if (VM_Version::supports_movw()) { + movw(rd, c & 0xffff, cond); + if ((unsigned int)c >> 16) { + movt(rd, (unsigned int)c >> 16, cond); + } + } else { + // Find first non-zero bit + int shift = 0; + while ((c & (3 << shift)) == 0) { + shift += 2; + } + // Put the least significant part of the constant + int mask = 0xff << shift; + mov(rd, c & mask, cond); + // Add up to 3 other parts of the constant; + // each of them can be represented as rotated_imm + if (c & (mask << 8)) { + orr(rd, rd, c & (mask << 8), cond); + } + if (c & (mask << 16)) { + orr(rd, rd, c & (mask << 16), cond); + } + if (c & (mask << 24)) { + orr(rd, rd, c & (mask << 24), cond); + } + } +} + +#endif // AARCH64 + +void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index, +#ifdef AARCH64 + bool patchable +#else + AsmCondition cond +#endif + ) { + + if (o == NULL) { +#ifdef AARCH64 + if (patchable) { + nop(); + } + mov(rd, ZR); +#else + mov(rd, 0, cond); +#endif + return; + } + + if (oop_index == 0) { + oop_index = oop_recorder()->allocate_oop_index(o); + } + relocate(oop_Relocation::spec(oop_index)); + +#ifdef AARCH64 + if (patchable) { + nop(); + } + ldr(rd, pc()); +#else + if (VM_Version::supports_movw()) { + movw(rd, 0, cond); + movt(rd, 0, cond); + } else { + ldr(rd, Address(PC), cond); + // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data). + nop(); + } +#endif +} + +void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index AARCH64_ONLY_ARG(bool patchable)) { + if (o == NULL) { +#ifdef AARCH64 + if (patchable) { + nop(); + } +#endif + mov(rd, 0); + return; + } + + if (metadata_index == 0) { + metadata_index = oop_recorder()->allocate_metadata_index(o); + } + relocate(metadata_Relocation::spec(metadata_index)); + +#ifdef AARCH64 + if (patchable) { + nop(); + } +#ifdef COMPILER2 + if (!patchable && VM_Version::prefer_moves_over_load_literal()) { + mov_slow(rd, (address)o); + return; + } +#endif + ldr(rd, pc()); +#else + if (VM_Version::supports_movw()) { + movw(rd, ((int)o) & 0xffff); + movt(rd, (unsigned int)o >> 16); + } else { + ldr(rd, Address(PC)); + // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data). + nop(); + } +#endif // AARCH64 +} + +void MacroAssembler::mov_float(FloatRegister fd, jfloat c NOT_AARCH64_ARG(AsmCondition cond)) { + Label skip_constant; + union { + jfloat f; + jint i; + } accessor; + accessor.f = c; + +#ifdef AARCH64 + // TODO-AARCH64 - try to optimize loading of float constants with fmov and/or mov_slow + Label L; + ldr_s(fd, target(L)); + b(skip_constant); + bind(L); + emit_int32(accessor.i); + bind(skip_constant); +#else + flds(fd, Address(PC), cond); + b(skip_constant); + emit_int32(accessor.i); + bind(skip_constant); +#endif // AARCH64 +} + +void MacroAssembler::mov_double(FloatRegister fd, jdouble c NOT_AARCH64_ARG(AsmCondition cond)) { + Label skip_constant; + union { + jdouble d; + jint i[2]; + } accessor; + accessor.d = c; + +#ifdef AARCH64 + // TODO-AARCH64 - try to optimize loading of double constants with fmov + Label L; + ldr_d(fd, target(L)); + b(skip_constant); + align(wordSize); + bind(L); + emit_int32(accessor.i[0]); + emit_int32(accessor.i[1]); + bind(skip_constant); +#else + fldd(fd, Address(PC), cond); + b(skip_constant); + emit_int32(accessor.i[0]); + emit_int32(accessor.i[1]); + bind(skip_constant); +#endif // AARCH64 +} + +void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) { + intptr_t addr = (intptr_t) address_of_global; +#ifdef AARCH64 + assert((addr & 0x3) == 0, "address should be aligned"); + + // FIXME: TODO + if (false && page_reachable_from_cache(address_of_global)) { + assert(false,"TODO: relocate"); + //relocate(); + adrp(reg, address_of_global); + ldrsw(reg, Address(reg, addr & 0xfff)); + } else { + mov_slow(reg, addr & ~0x3fff); + ldrsw(reg, Address(reg, addr & 0x3fff)); + } +#else + mov_slow(reg, addr & ~0xfff); + ldr(reg, Address(reg, addr & 0xfff)); +#endif +} + +void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) { +#ifdef AARCH64 + intptr_t addr = (intptr_t) address_of_global; + assert ((addr & 0x7) == 0, "address should be aligned"); + mov_slow(reg, addr & ~0x7fff); + ldr(reg, Address(reg, addr & 0x7fff)); +#else + ldr_global_s32(reg, address_of_global); +#endif +} + +void MacroAssembler::ldrb_global(Register reg, address address_of_global) { + intptr_t addr = (intptr_t) address_of_global; + mov_slow(reg, addr & ~0xfff); + ldrb(reg, Address(reg, addr & 0xfff)); +} + +void MacroAssembler::zero_extend(Register rd, Register rn, int bits) { +#ifdef AARCH64 + switch (bits) { + case 8: uxtb(rd, rn); break; + case 16: uxth(rd, rn); break; + case 32: mov_w(rd, rn); break; + default: ShouldNotReachHere(); + } +#else + if (bits <= 8) { + andr(rd, rn, (1 << bits) - 1); + } else if (bits >= 24) { + bic(rd, rn, -1 << bits); + } else { + mov(rd, AsmOperand(rn, lsl, 32 - bits)); + mov(rd, AsmOperand(rd, lsr, 32 - bits)); + } +#endif +} + +void MacroAssembler::sign_extend(Register rd, Register rn, int bits) { +#ifdef AARCH64 + switch (bits) { + case 8: sxtb(rd, rn); break; + case 16: sxth(rd, rn); break; + case 32: sxtw(rd, rn); break; + default: ShouldNotReachHere(); + } +#else + mov(rd, AsmOperand(rn, lsl, 32 - bits)); + mov(rd, AsmOperand(rd, asr, 32 - bits)); +#endif +} + +#ifndef AARCH64 + +void MacroAssembler::long_move(Register rd_lo, Register rd_hi, + Register rn_lo, Register rn_hi, + AsmCondition cond) { + if (rd_lo != rn_hi) { + if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } + if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } + } else if (rd_hi != rn_lo) { + if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); } + if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); } + } else { + eor(rd_lo, rd_hi, rd_lo, cond); + eor(rd_hi, rd_lo, rd_hi, cond); + eor(rd_lo, rd_hi, rd_lo, cond); + } +} + +void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, + Register rn_lo, Register rn_hi, + AsmShift shift, Register count) { + Register tmp; + if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) { + tmp = rd_lo; + } else { + tmp = rd_hi; + } + assert_different_registers(tmp, count, rn_lo, rn_hi); + + subs(tmp, count, 32); + if (shift == lsl) { + assert_different_registers(rd_hi, rn_lo); + assert_different_registers(count, rd_hi); + mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl); + rsb(tmp, count, 32, mi); + if (rd_hi == rn_hi) { + mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi); + orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); + } else { + mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi); + orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi); + } + mov(rd_lo, AsmOperand(rn_lo, shift, count)); + } else { + assert_different_registers(rd_lo, rn_hi); + assert_different_registers(rd_lo, count); + mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl); + rsb(tmp, count, 32, mi); + if (rd_lo == rn_lo) { + mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi); + orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); + } else { + mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi); + orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi); + } + mov(rd_hi, AsmOperand(rn_hi, shift, count)); + } +} + +void MacroAssembler::long_shift(Register rd_lo, Register rd_hi, + Register rn_lo, Register rn_hi, + AsmShift shift, int count) { + assert(count != 0 && (count & ~63) == 0, "must be"); + + if (shift == lsl) { + assert_different_registers(rd_hi, rn_lo); + if (count >= 32) { + mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32)); + mov(rd_lo, 0); + } else { + mov(rd_hi, AsmOperand(rn_hi, lsl, count)); + orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count)); + mov(rd_lo, AsmOperand(rn_lo, lsl, count)); + } + } else { + assert_different_registers(rd_lo, rn_hi); + if (count >= 32) { + if (count == 32) { + mov(rd_lo, rn_hi); + } else { + mov(rd_lo, AsmOperand(rn_hi, shift, count - 32)); + } + if (shift == asr) { + mov(rd_hi, AsmOperand(rn_hi, asr, 0)); + } else { + mov(rd_hi, 0); + } + } else { + mov(rd_lo, AsmOperand(rn_lo, lsr, count)); + orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count)); + mov(rd_hi, AsmOperand(rn_hi, shift, count)); + } + } +} +#endif // !AARCH64 + +void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) { + // This code pattern is matched in NativeIntruction::skip_verify_oop. + // Update it at modifications. + if (!VerifyOops) return; + + char buffer[64]; +#ifdef COMPILER1 + if (CommentedAssembly) { + snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); + block_comment(buffer); + } +#endif + const char* msg_buffer = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("%s at offset %d (%s:%d)", s, offset(), file, line); + msg_buffer = code_string(ss.as_string()); + } + + save_all_registers(); + + if (reg != R2) { + mov(R2, reg); // oop to verify + } + mov(R1, SP); // register save area + + Label done; + InlinedString Lmsg(msg_buffer); + ldr_literal(R0, Lmsg); // message + + // call indirectly to solve generation ordering problem + ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); + call(Rtemp); + + restore_all_registers(); + + b(done); +#ifdef COMPILER2 + int off = offset(); +#endif + bind_literal(Lmsg); +#ifdef COMPILER2 + if (offset() - off == 1 * wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } +#endif + bind(done); +} + +void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { + if (!VerifyOops) return; + + const char* msg_buffer = NULL; + { + ResourceMark rm; + stringStream ss; + if ((addr.base() == SP) && (addr.index()==noreg)) { + ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s); + } else { + ss.print("verify_oop_addr: %s", s); + } + ss.print(" (%s:%d)", file, line); + msg_buffer = code_string(ss.as_string()); + } + + int push_size = save_all_registers(); + + if (addr.base() == SP) { + // computes an addr that takes into account the push + if (addr.index() != noreg) { + Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index + add(new_base, SP, push_size); + addr = addr.rebase(new_base); + } else { + addr = addr.plus_disp(push_size); + } + } + + ldr(R2, addr); // oop to verify + mov(R1, SP); // register save area + + Label done; + InlinedString Lmsg(msg_buffer); + ldr_literal(R0, Lmsg); // message + + // call indirectly to solve generation ordering problem + ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address()); + call(Rtemp); + + restore_all_registers(); + + b(done); + bind_literal(Lmsg); + bind(done); +} + +void MacroAssembler::null_check(Register reg, Register tmp, int offset) { + if (needs_explicit_null_check(offset)) { +#ifdef AARCH64 + ldr(ZR, Address(reg)); +#else + assert_different_registers(reg, tmp); + if (tmp == noreg) { + tmp = Rtemp; + assert((! Thread::current()->is_Compiler_thread()) || + (! (ciEnv::current()->task() == NULL)) || + (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)), + "Rtemp not available in C2"); // explicit tmp register required + // XXX: could we mark the code buffer as not compatible with C2 ? + } + ldr(tmp, Address(reg)); +#endif + } +} + +// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. +void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2, + RegisterOrConstant size_expression, Label& slow_case) { + if (!Universe::heap()->supports_inline_contig_alloc()) { + b(slow_case); + return; + } + + CollectedHeap* ch = Universe::heap(); + + const Register top_addr = tmp1; + const Register heap_end = tmp2; + + if (size_expression.is_register()) { + assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); + } else { + assert_different_registers(obj, obj_end, top_addr, heap_end); + } + + bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance + if (load_const) { + mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); + } else { + ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); + } + // Calculate new heap_top by adding the size of the object + Label retry; + bind(retry); + +#ifdef AARCH64 + ldxr(obj, top_addr); +#else + ldr(obj, Address(top_addr)); +#endif // AARCH64 + + ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); + add_rc(obj_end, obj, size_expression); + // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. + cmp(obj_end, obj); + b(slow_case, lo); + // Update heap_top if allocation succeeded + cmp(obj_end, heap_end); + b(slow_case, hi); + +#ifdef AARCH64 + stxr(heap_end/*scratched*/, obj_end, top_addr); + cbnz_w(heap_end, retry); +#else + atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); + b(retry, ne); +#endif // AARCH64 +} + +// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. +void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1, + RegisterOrConstant size_expression, Label& slow_case) { + const Register tlab_end = tmp1; + assert_different_registers(obj, obj_end, tlab_end); + + ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); + ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); + add_rc(obj_end, obj, size_expression); + cmp(obj_end, tlab_end); + b(slow_case, hi); + str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); +} + +void MacroAssembler::tlab_refill(Register top, Register tmp1, Register tmp2, + Register tmp3, Register tmp4, + Label& try_eden, Label& slow_case) { + if (!Universe::heap()->supports_inline_contig_alloc()) { + b(slow_case); + return; + } + + InlinedAddress intArrayKlass_addr((address)Universe::intArrayKlassObj_addr()); + Label discard_tlab, do_refill; + ldr(top, Address(Rthread, JavaThread::tlab_top_offset())); + ldr(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); + ldr(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); + + // Calculate amount of free space + sub(tmp1, tmp1, top); + // Retain tlab and allocate in shared space + // if the amount of free space in tlab is too large to discard + cmp(tmp2, AsmOperand(tmp1, lsr, LogHeapWordSize)); + b(discard_tlab, ge); + + // Increment waste limit to prevent getting stuck on this slow path + mov_slow(tmp3, ThreadLocalAllocBuffer::refill_waste_limit_increment()); + add(tmp2, tmp2, tmp3); + str(tmp2, Address(Rthread, JavaThread::tlab_refill_waste_limit_offset())); + if (TLABStats) { + ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); + add_32(tmp2, tmp2, 1); + str_32(tmp2, Address(Rthread, JavaThread::tlab_slow_allocations_offset())); + } + b(try_eden); + bind_literal(intArrayKlass_addr); + + bind(discard_tlab); + if (TLABStats) { + ldr_u32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); + ldr_u32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); + add_32(tmp2, tmp2, 1); + add_32(tmp3, tmp3, AsmOperand(tmp1, lsr, LogHeapWordSize)); + str_32(tmp2, Address(Rthread, JavaThread::tlab_number_of_refills_offset())); + str_32(tmp3, Address(Rthread, JavaThread::tlab_fast_refill_waste_offset())); + } + // If tlab is currently allocated (top or end != null) + // then fill [top, end + alignment_reserve) with array object + cbz(top, do_refill); + + // Set up the mark word + mov_slow(tmp2, (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); + str(tmp2, Address(top, oopDesc::mark_offset_in_bytes())); + // Set klass to intArrayKlass and the length to the remaining space + ldr_literal(tmp2, intArrayKlass_addr); + add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes() - + typeArrayOopDesc::header_size(T_INT) * HeapWordSize); + Register klass = tmp2; + ldr(klass, Address(tmp2)); + logical_shift_right(tmp1, tmp1, LogBytesPerInt); // divide by sizeof(jint) + str_32(tmp1, Address(top, arrayOopDesc::length_offset_in_bytes())); + store_klass(klass, top); // blows klass: + klass = noreg; + + ldr(tmp1, Address(Rthread, JavaThread::tlab_start_offset())); + sub(tmp1, top, tmp1); // size of tlab's allocated portion + incr_allocated_bytes(tmp1, tmp2); + + bind(do_refill); + // Refill the tlab with an eden allocation + ldr(tmp1, Address(Rthread, JavaThread::tlab_size_offset())); + logical_shift_left(tmp4, tmp1, LogHeapWordSize); + eden_allocate(top, tmp1, tmp2, tmp3, tmp4, slow_case); + str(top, Address(Rthread, JavaThread::tlab_start_offset())); + str(top, Address(Rthread, JavaThread::tlab_top_offset())); + +#ifdef ASSERT + // Verify that tmp1 contains tlab_end + ldr(tmp2, Address(Rthread, JavaThread::tlab_size_offset())); + add(tmp2, top, AsmOperand(tmp2, lsl, LogHeapWordSize)); + cmp(tmp1, tmp2); + breakpoint(ne); +#endif + + sub(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); + str(tmp1, Address(Rthread, JavaThread::tlab_end_offset())); + + if (ZeroTLAB) { + // clobbers start and tmp + // top must be preserved! + add(tmp1, tmp1, ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); + ldr(tmp2, Address(Rthread, JavaThread::tlab_start_offset())); + zero_memory(tmp2, tmp1, tmp3); + } +} + +// Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers. +void MacroAssembler::zero_memory(Register start, Register end, Register tmp) { + Label loop; + const Register ptr = start; + +#ifdef AARCH64 + // TODO-AARCH64 - compare performance of 2x word zeroing with simple 1x + const Register size = tmp; + Label remaining, done; + + sub(size, end, start); + +#ifdef ASSERT + { Label L; + tst(size, wordSize - 1); + b(L, eq); + stop("size is not a multiple of wordSize"); + bind(L); + } +#endif // ASSERT + + subs(size, size, wordSize); + b(remaining, le); + + // Zero by 2 words per iteration. + bind(loop); + subs(size, size, 2*wordSize); + stp(ZR, ZR, Address(ptr, 2*wordSize, post_indexed)); + b(loop, gt); + + bind(remaining); + b(done, ne); + str(ZR, Address(ptr)); + bind(done); +#else + mov(tmp, 0); + bind(loop); + cmp(ptr, end); + str(tmp, Address(ptr, wordSize, post_indexed), lo); + b(loop, lo); +#endif // AARCH64 +} + +void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) { +#ifdef AARCH64 + ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + add_rc(tmp, tmp, size_in_bytes); + str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); +#else + // Bump total bytes allocated by this thread + Label done; + + ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + adds(tmp, tmp, size_in_bytes); + str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())), cc); + b(done, cc); + + // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) + // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by + // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. + Register low, high; + // Select ether R0/R1 or R2/R3 + + if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { + low = R2; + high = R3; + } else { + low = R0; + high = R1; + } + push(RegisterSet(low, high)); + + ldrd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + adds(low, low, size_in_bytes); + adc(high, high, 0); + strd(low, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + + pop(RegisterSet(low, high)); + + bind(done); +#endif // AARCH64 +} + +void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) { + // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM + if (UseStackBanging) { + const int page_size = os::vm_page_size(); + + sub_slow(tmp, SP, JavaThread::stack_shadow_zone_size()); + strb(R0, Address(tmp)); +#ifdef AARCH64 + for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= page_size) { + sub(tmp, tmp, page_size); + strb(R0, Address(tmp)); + } +#else + for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) { + strb(R0, Address(tmp, -0xff0, pre_indexed)); + } +#endif // AARCH64 + } +} + +void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) { + if (UseStackBanging) { + Label loop; + + mov(tmp, SP); + add_slow(Rsize, Rsize, JavaThread::stack_shadow_zone_size() - os::vm_page_size()); +#ifdef AARCH64 + sub(tmp, tmp, Rsize); + bind(loop); + subs(Rsize, Rsize, os::vm_page_size()); + strb(ZR, Address(tmp, Rsize)); +#else + bind(loop); + subs(Rsize, Rsize, 0xff0); + strb(R0, Address(tmp, -0xff0, pre_indexed)); +#endif // AARCH64 + b(loop, hi); + } +} + +void MacroAssembler::stop(const char* msg) { + // This code pattern is matched in NativeIntruction::is_stop. + // Update it at modifications. +#ifdef COMPILER1 + if (CommentedAssembly) { + block_comment("stop"); + } +#endif + + InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug)); + InlinedString Lmsg(msg); + + // save all registers for further inspection + save_all_registers(); + + ldr_literal(R0, Lmsg); // message + mov(R1, SP); // register save area + +#ifdef AARCH64 + ldr_literal(Rtemp, Ldebug); + br(Rtemp); +#else + ldr_literal(PC, Ldebug); // call MacroAssembler::debug +#endif // AARCH64 + +#if defined(COMPILER2) && defined(AARCH64) + int off = offset(); +#endif + bind_literal(Lmsg); + bind_literal(Ldebug); +#if defined(COMPILER2) && defined(AARCH64) + if (offset() - off == 2 * wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } +#endif +} + +void MacroAssembler::warn(const char* msg) { +#ifdef COMPILER1 + if (CommentedAssembly) { + block_comment("warn"); + } +#endif + + InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning)); + InlinedString Lmsg(msg); + Label done; + + int push_size = save_caller_save_registers(); + +#ifdef AARCH64 + // TODO-AARCH64 - get rid of extra debug parameters + mov(R1, LR); + mov(R2, FP); + add(R3, SP, push_size); +#endif + + ldr_literal(R0, Lmsg); // message + ldr_literal(LR, Lwarn); // call warning + + call(LR); + + restore_caller_save_registers(); + + b(done); + bind_literal(Lmsg); + bind_literal(Lwarn); + bind(done); +} + + +int MacroAssembler::save_all_registers() { + // This code pattern is matched in NativeIntruction::is_save_all_registers. + // Update it at modifications. +#ifdef AARCH64 + const Register tmp = Rtemp; + raw_push(R30, ZR); + for (int i = 28; i >= 0; i -= 2) { + raw_push(as_Register(i), as_Register(i+1)); + } + mov_pc_to(tmp); + str(tmp, Address(SP, 31*wordSize)); + ldr(tmp, Address(SP, tmp->encoding()*wordSize)); + return 32*wordSize; +#else + push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC)); + return 15*wordSize; +#endif // AARCH64 +} + +void MacroAssembler::restore_all_registers() { +#ifdef AARCH64 + for (int i = 0; i <= 28; i += 2) { + raw_pop(as_Register(i), as_Register(i+1)); + } + raw_pop(R30, ZR); +#else + pop(RegisterSet(R0, R12) | RegisterSet(LR)); // restore registers + add(SP, SP, wordSize); // discard saved PC +#endif // AARCH64 +} + +int MacroAssembler::save_caller_save_registers() { +#ifdef AARCH64 + for (int i = 0; i <= 16; i += 2) { + raw_push(as_Register(i), as_Register(i+1)); + } + raw_push(R18, LR); + return 20*wordSize; +#else +#if R9_IS_SCRATCHED + // Save also R10 to preserve alignment + push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); + return 8*wordSize; +#else + push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); + return 6*wordSize; +#endif +#endif // AARCH64 +} + +void MacroAssembler::restore_caller_save_registers() { +#ifdef AARCH64 + raw_pop(R18, LR); + for (int i = 16; i >= 0; i -= 2) { + raw_pop(as_Register(i), as_Register(i+1)); + } +#else +#if R9_IS_SCRATCHED + pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10)); +#else + pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR)); +#endif +#endif // AARCH64 +} + +void MacroAssembler::debug(const char* msg, const intx* registers) { + // In order to get locks to work, we need to fake a in_VM state + JavaThread* thread = JavaThread::current(); + thread->set_thread_state(_thread_in_vm); + + if (ShowMessageBoxOnError) { + ttyLocker ttyl; + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } + if (os::message_box(msg, "Execution stopped, print registers?")) { +#ifdef AARCH64 + // saved registers: R0-R30, PC + const int nregs = 32; +#else + // saved registers: R0-R12, LR, PC + const int nregs = 15; + const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC}; +#endif // AARCH64 + + for (int i = 0; i < nregs AARCH64_ONLY(-1); i++) { + tty->print_cr("%s = " INTPTR_FORMAT, AARCH64_ONLY(as_Register(i)) NOT_AARCH64(regs[i])->name(), registers[i]); + } + +#ifdef AARCH64 + tty->print_cr("pc = " INTPTR_FORMAT, registers[nregs-1]); +#endif // AARCH64 + + // derive original SP value from the address of register save area + tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(®isters[nregs])); + } + BREAKPOINT; + } else { + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); + } + assert(false, "DEBUG MESSAGE: %s", msg); + fatal("%s", msg); // returning from MacroAssembler::debug is not supported +} + +void MacroAssembler::unimplemented(const char* what) { + const char* buf = NULL; + { + ResourceMark rm; + stringStream ss; + ss.print("unimplemented: %s", what); + buf = code_string(ss.as_string()); + } + stop(buf); +} + + +// Implementation of FixedSizeCodeBlock + +FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) : +_masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) { +} + +FixedSizeCodeBlock::~FixedSizeCodeBlock() { + if (_enabled) { + address curr_pc = _masm->pc(); + + assert(_start < curr_pc, "invalid current pc"); + guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long"); + + int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs; + for (int i = 0; i < nops_count; i++) { + _masm->nop(); + } + } +} + +#ifdef AARCH64 + +// Serializes memory. +// tmp register is not used on AArch64, this parameter is provided solely for better compatibility with 32-bit ARM +void MacroAssembler::membar(Membar_mask_bits order_constraint, Register tmp) { + if (!os::is_MP()) return; + + // TODO-AARCH64 investigate dsb vs dmb effects + if (order_constraint == StoreStore) { + dmb(DMB_st); + } else if ((order_constraint & ~(LoadLoad | LoadStore)) == 0) { + dmb(DMB_ld); + } else { + dmb(DMB_all); + } +} + +#else + +// Serializes memory. Potentially blows flags and reg. +// tmp is a scratch for v6 co-processor write op (could be noreg for other architecure versions) +// preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional. +// load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional. +void MacroAssembler::membar(Membar_mask_bits order_constraint, + Register tmp, + bool preserve_flags, + Register load_tgt) { + if (!os::is_MP()) return; + + if (order_constraint == StoreStore) { + dmb(DMB_st, tmp); + } else if ((order_constraint & StoreLoad) || + (order_constraint & LoadLoad) || + (order_constraint & StoreStore) || + (load_tgt == noreg) || + preserve_flags) { + dmb(DMB_all, tmp); + } else { + // LoadStore: speculative stores reordeing is prohibited + + // By providing an ordered load target register, we avoid an extra memory load reference + Label not_taken; + bind(not_taken); + cmp(load_tgt, load_tgt); + b(not_taken, ne); + } +} + +#endif // AARCH64 + +// If "allow_fallthrough_on_failure" is false, we always branch to "slow_case" +// on failure, so fall-through can only mean success. +// "one_shot" controls whether we loop and retry to mitigate spurious failures. +// This is only needed for C2, which for some reason does not rety, +// while C1/interpreter does. +// TODO: measure if it makes a difference + +void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, + Register base, Register tmp, Label &slow_case, + bool allow_fallthrough_on_failure, bool one_shot) +{ + + bool fallthrough_is_success = false; + + // ARM Litmus Test example does prefetching here. + // TODO: investigate if it helps performance + + // The last store was to the displaced header, so to prevent + // reordering we must issue a StoreStore or Release barrier before + // the CAS store. + +#ifdef AARCH64 + + Register Rscratch = tmp; + Register Roop = base; + Register mark = oldval; + Register Rbox = newval; + Label loop; + + assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); + + // Instead of StoreStore here, we use store-release-exclusive below + + bind(loop); + + ldaxr(tmp, base); // acquire + cmp(tmp, oldval); + b(slow_case, ne); + stlxr(tmp, newval, base); // release + if (one_shot) { + cmp_w(tmp, 0); + } else { + cbnz_w(tmp, loop); + fallthrough_is_success = true; + } + + // MemBarAcquireLock would normally go here, but + // we already do ldaxr+stlxr above, which has + // Sequential Consistency + +#else + membar(MacroAssembler::StoreStore, noreg); + + if (one_shot) { + ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); + cmp(tmp, oldval); + strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); + cmp(tmp, 0, eq); + } else { + atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); + } + + // MemBarAcquireLock barrier + // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, + // but that doesn't prevent a load or store from floating up between + // the load and store in the CAS sequence, so play it safe and + // do a full fence. + membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); +#endif + if (!fallthrough_is_success && !allow_fallthrough_on_failure) { + b(slow_case, ne); + } +} + +void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, + Register base, Register tmp, Label &slow_case, + bool allow_fallthrough_on_failure, bool one_shot) +{ + + bool fallthrough_is_success = false; + + assert_different_registers(oldval,newval,base,tmp); + +#ifdef AARCH64 + Label loop; + + assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); + + bind(loop); + ldxr(tmp, base); + cmp(tmp, oldval); + b(slow_case, ne); + // MemBarReleaseLock barrier + stlxr(tmp, newval, base); + if (one_shot) { + cmp_w(tmp, 0); + } else { + cbnz_w(tmp, loop); + fallthrough_is_success = true; + } +#else + // MemBarReleaseLock barrier + // According to JSR-133 Cookbook, this should be StoreStore | LoadStore, + // but that doesn't prevent a load or store from floating down between + // the load and store in the CAS sequence, so play it safe and + // do a full fence. + membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp); + + if (one_shot) { + ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes())); + cmp(tmp, oldval); + strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq); + cmp(tmp, 0, eq); + } else { + atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); + } +#endif + if (!fallthrough_is_success && !allow_fallthrough_on_failure) { + b(slow_case, ne); + } + + // ExitEnter + // According to JSR-133 Cookbook, this should be StoreLoad, the same + // barrier that follows volatile store. + // TODO: Should be able to remove on armv8 if volatile loads + // use the load-acquire instruction. + membar(StoreLoad, noreg); +} + +#ifndef PRODUCT + +// Preserves flags and all registers. +// On SMP the updated value might not be visible to external observers without a sychronization barrier +void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) { + if (counter_addr != NULL) { + InlinedAddress counter_addr_literal((address)counter_addr); + Label done, retry; + if (cond != al) { + b(done, inverse(cond)); + } + +#ifdef AARCH64 + raw_push(R0, R1); + raw_push(R2, ZR); + + ldr_literal(R0, counter_addr_literal); + + bind(retry); + ldxr_w(R1, R0); + add_w(R1, R1, 1); + stxr_w(R2, R1, R0); + cbnz_w(R2, retry); + + raw_pop(R2, ZR); + raw_pop(R0, R1); +#else + push(RegisterSet(R0, R3) | RegisterSet(Rtemp)); + ldr_literal(R0, counter_addr_literal); + + mrs(CPSR, Rtemp); + + bind(retry); + ldr_s32(R1, Address(R0)); + add(R2, R1, 1); + atomic_cas_bool(R1, R2, R0, 0, R3); + b(retry, ne); + + msr(CPSR_fsxc, Rtemp); + + pop(RegisterSet(R0, R3) | RegisterSet(Rtemp)); +#endif // AARCH64 + + b(done); + bind_literal(counter_addr_literal); + + bind(done); + } +} + +#endif // !PRODUCT + + +// Building block for CAS cases of biased locking: makes CAS and records statistics. +// The slow_case label is used to transfer control if CAS fails. Otherwise leaves condition codes set. +void MacroAssembler::biased_locking_enter_with_cas(Register obj_reg, Register old_mark_reg, Register new_mark_reg, + Register tmp, Label& slow_case, int* counter_addr) { + + cas_for_lock_acquire(old_mark_reg, new_mark_reg, obj_reg, tmp, slow_case); +#ifdef ASSERT + breakpoint(ne); // Fallthrough only on success +#endif +#ifndef PRODUCT + if (counter_addr != NULL) { + cond_atomic_inc32(al, counter_addr); + } +#endif // !PRODUCT +} + +int MacroAssembler::biased_locking_enter(Register obj_reg, Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Register tmp2, + Label& done, Label& slow_case, + BiasedLockingCounters* counters) { + // obj_reg must be preserved (at least) if the bias locking fails + // tmp_reg is a temporary register + // swap_reg was used as a temporary but contained a value + // that was used afterwards in some call pathes. Callers + // have been fixed so that swap_reg no longer needs to be + // saved. + // Rtemp in no longer scratched + + assert(UseBiasedLocking, "why call this otherwise?"); + assert_different_registers(obj_reg, swap_reg, tmp_reg, tmp2); + guarantee(swap_reg!=tmp_reg, "invariant"); + assert(tmp_reg != noreg, "must supply tmp_reg"); + +#ifndef PRODUCT + if (PrintBiasedLockingStatistics && (counters == NULL)) { + counters = BiasedLocking::counters(); + } +#endif + + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr(obj_reg, oopDesc::mark_offset_in_bytes()); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + + // The null check applies to the mark loading, if we need to load it. + // If the mark has already been loaded in swap_reg then it has already + // been performed and the offset is irrelevant. + int null_check_offset = offset(); + if (!swap_reg_contains_mark) { + ldr(swap_reg, mark_addr); + } + + // On MP platform loads could return 'stale' values in some cases. + // That is acceptable since either CAS or slow case path is taken in the worst case. + + andr(tmp_reg, swap_reg, (uintx)markOopDesc::biased_lock_mask_in_place); + cmp(tmp_reg, markOopDesc::biased_lock_pattern); + + b(cas_label, ne); + + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + load_klass(tmp_reg, obj_reg); + ldr(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); + orr(tmp_reg, tmp_reg, Rthread); + eor(tmp_reg, tmp_reg, swap_reg); + +#ifdef AARCH64 + ands(tmp_reg, tmp_reg, ~((uintx) markOopDesc::age_mask_in_place)); +#else + bics(tmp_reg, tmp_reg, ((int) markOopDesc::age_mask_in_place)); +#endif // AARCH64 + +#ifndef PRODUCT + if (counters != NULL) { + cond_atomic_inc32(eq, counters->biased_lock_entry_count_addr()); + } +#endif // !PRODUCT + + b(done, eq); + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + tst(tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); + b(try_revoke_bias, ne); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + tst(tmp_reg, (uintx)markOopDesc::epoch_mask_in_place); + b(try_rebias, ne); + + // tmp_reg has the age, epoch and pattern bits cleared + // The remaining (owner) bits are (Thread ^ current_owner) + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + + // Note that we know the owner is not ourself. Hence, success can + // only happen when the owner bits is 0 + +#ifdef AARCH64 + // Bit mask biased_lock + age + epoch is not a valid AArch64 logical immediate, as it has + // cleared bit in the middle (cms bit). So it is loaded with separate instruction. + mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); + andr(swap_reg, swap_reg, tmp2); +#else + // until the assembler can be made smarter, we need to make some assumptions about the values + // so we can optimize this: + assert((markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place) == 0x1ff, "biased bitmasks changed"); + + mov(swap_reg, AsmOperand(swap_reg, lsl, 23)); + mov(swap_reg, AsmOperand(swap_reg, lsr, 23)); // markOop with thread bits cleared (for CAS) +#endif // AARCH64 + + orr(tmp_reg, swap_reg, Rthread); // new mark + + biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, + (counters != NULL) ? counters->anonymously_biased_lock_entry_count_addr() : NULL); + + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + + b(done); + + bind(try_rebias); + + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + + // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) + + eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) + + // owner bits 'random'. Set them to Rthread. +#ifdef AARCH64 + mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); + andr(tmp_reg, tmp_reg, tmp2); +#else + mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); + mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); +#endif // AARCH64 + + orr(tmp_reg, tmp_reg, Rthread); // new mark + + biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, slow_case, + (counters != NULL) ? counters->rebiased_lock_entry_count_addr() : NULL); + + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + + b(done); + + bind(try_revoke_bias); + + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + + // tmp_reg low (not owner) bits are (age: 0 | pattern&epoch: prototype^swap_reg) + + eor(tmp_reg, tmp_reg, swap_reg); // OK except for owner bits (age preserved !) + + // owner bits 'random'. Clear them +#ifdef AARCH64 + mov(tmp2, (markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place)); + andr(tmp_reg, tmp_reg, tmp2); +#else + mov(tmp_reg, AsmOperand(tmp_reg, lsl, 23)); + mov(tmp_reg, AsmOperand(tmp_reg, lsr, 23)); +#endif // AARCH64 + + biased_locking_enter_with_cas(obj_reg, swap_reg, tmp_reg, tmp2, cas_label, + (counters != NULL) ? counters->revoked_lock_entry_count_addr() : NULL); + + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + + bind(cas_label); + + return null_check_offset; +} + + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ldr(tmp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + + andr(tmp_reg, tmp_reg, (uintx)markOopDesc::biased_lock_mask_in_place); + cmp(tmp_reg, markOopDesc::biased_lock_pattern); + b(done, eq); +} + +#ifdef AARCH64 + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { + switch (size_in_bytes) { + case 8: ldr(dst, src); break; + case 4: is_signed ? ldr_s32(dst, src) : ldr_u32(dst, src); break; + case 2: is_signed ? ldrsh(dst, src) : ldrh(dst, src); break; + case 1: is_signed ? ldrsb(dst, src) : ldrb(dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { + switch (size_in_bytes) { + case 8: str(src, dst); break; + case 4: str_32(src, dst); break; + case 2: strh(src, dst); break; + case 1: strb(src, dst); break; + default: ShouldNotReachHere(); + } +} + +#else + +void MacroAssembler::load_sized_value(Register dst, Address src, + size_t size_in_bytes, bool is_signed, AsmCondition cond) { + switch (size_in_bytes) { + case 4: ldr(dst, src, cond); break; + case 2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break; + case 1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break; + default: ShouldNotReachHere(); + } +} + + +void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) { + switch (size_in_bytes) { + case 4: str(src, dst, cond); break; + case 2: strh(src, dst, cond); break; + case 1: strb(src, dst, cond); break; + default: ShouldNotReachHere(); + } +} +#endif // AARCH64 + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in Rklass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register Rklass, + Register Rinterf, + Register Rindex, + Register method_result, + Register temp_reg1, + Register temp_reg2, + Label& L_no_such_interface) { + + assert_different_registers(Rklass, Rinterf, temp_reg1, temp_reg2, Rindex); + + Register Ritable = temp_reg1; + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + const int base = in_bytes(Klass::vtable_start_offset()); + const int scale = exact_log2(vtableEntry::size_in_bytes()); + ldr_s32(temp_reg2, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable + add(Ritable, Rklass, base); + add(Ritable, Ritable, AsmOperand(temp_reg2, lsl, scale)); + + Label entry, search; + + b(entry); + + bind(search); + add(Ritable, Ritable, itableOffsetEntry::size() * HeapWordSize); + + bind(entry); + + // Check that the entry is non-null. A null entry means that the receiver + // class doesn't implement the interface, and wasn't the same as the + // receiver class checked when the interface was resolved. + + ldr(temp_reg2, Address(Ritable, itableOffsetEntry::interface_offset_in_bytes())); + cbz(temp_reg2, L_no_such_interface); + + cmp(Rinterf, temp_reg2); + b(search, ne); + + ldr_s32(temp_reg2, Address(Ritable, itableOffsetEntry::offset_offset_in_bytes())); + add(temp_reg2, temp_reg2, Rklass); // Add offset to Klass* + assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below"); + assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust the offset in the code below"); + + ldr(method_result, Address::indexed_ptr(temp_reg2, Rindex)); +} + +#ifdef COMPILER2 +// TODO: 8 bytes at a time? pre-fetch? +// Compare char[] arrays aligned to 4 bytes. +void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, + Register limit, Register result, + Register chr1, Register chr2, Label& Ldone) { + Label Lvector, Lloop; + + // Note: limit contains number of bytes (2*char_elements) != 0. + tst(limit, 0x2); // trailing character ? + b(Lvector, eq); + + // compare the trailing char + sub(limit, limit, sizeof(jchar)); + ldrh(chr1, Address(ary1, limit)); + ldrh(chr2, Address(ary2, limit)); + cmp(chr1, chr2); + mov(result, 0, ne); // not equal + b(Ldone, ne); + + // only one char ? + tst(limit, limit); + mov(result, 1, eq); + b(Ldone, eq); + + // word by word compare, dont't need alignment check + bind(Lvector); + + // Shift ary1 and ary2 to the end of the arrays, negate limit + add(ary1, limit, ary1); + add(ary2, limit, ary2); + neg(limit, limit); + + bind(Lloop); + ldr_u32(chr1, Address(ary1, limit)); + ldr_u32(chr2, Address(ary2, limit)); + cmp_32(chr1, chr2); + mov(result, 0, ne); // not equal + b(Ldone, ne); + adds(limit, limit, 2*sizeof(jchar)); + b(Lloop, ne); + + // Caller should set it: + // mov(result_reg, 1); //equal +} +#endif + +void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) { + mov_slow(tmpreg1, counter_addr); + ldr_s32(tmpreg2, tmpreg1); + add_32(tmpreg2, tmpreg2, 1); + str_32(tmpreg2, tmpreg1); +} + +void MacroAssembler::floating_cmp(Register dst) { +#ifdef AARCH64 + NOT_TESTED(); + cset(dst, gt); // 1 if '>', else 0 + csinv(dst, dst, ZR, ge); // previous value if '>=', else -1 +#else + vmrs(dst, FPSCR); + orr(dst, dst, 0x08000000); + eor(dst, dst, AsmOperand(dst, lsl, 3)); + mov(dst, AsmOperand(dst, asr, 30)); +#endif +} + +void MacroAssembler::restore_default_fp_mode() { +#ifdef AARCH64 + msr(SysReg_FPCR, ZR); +#else +#ifndef __SOFTFP__ + // Round to Near mode, IEEE compatible, masked exceptions + mov(Rtemp, 0); + vmsr(FPSCR, Rtemp); +#endif // !__SOFTFP__ +#endif // AARCH64 +} + +#ifndef AARCH64 +// 24-bit word range == 26-bit byte range +bool check26(int offset) { + // this could be simplified, but it mimics encoding and decoding + // an actual branch insrtuction + int off1 = offset << 6 >> 8; + int encoded = off1 & ((1<<24)-1); + int decoded = encoded << 8 >> 6; + return offset == decoded; +} +#endif // !AARCH64 + +// Perform some slight adjustments so the default 32MB code cache +// is fully reachable. +static inline address first_cache_address() { + return CodeCache::low_bound() + sizeof(HeapBlock::Header); +} +static inline address last_cache_address() { + return CodeCache::high_bound() - Assembler::InstructionSize; +} + +#ifdef AARCH64 +// Can we reach target using ADRP? +bool MacroAssembler::page_reachable_from_cache(address target) { + intptr_t cl = (intptr_t)first_cache_address() & ~0xfff; + intptr_t ch = (intptr_t)last_cache_address() & ~0xfff; + intptr_t addr = (intptr_t)target & ~0xfff; + + intptr_t loffset = addr - cl; + intptr_t hoffset = addr - ch; + return is_imm_in_range(loffset >> 12, 21, 0) && is_imm_in_range(hoffset >> 12, 21, 0); +} +#endif + +// Can we reach target using unconditional branch or call from anywhere +// in the code cache (because code can be relocated)? +bool MacroAssembler::_reachable_from_cache(address target) { +#ifdef __thumb__ + if ((1 & (intptr_t)target) != 0) { + // Return false to avoid 'b' if we need switching to THUMB mode. + return false; + } +#endif + + address cl = first_cache_address(); + address ch = last_cache_address(); + + if (ForceUnreachable) { + // Only addresses from CodeCache can be treated as reachable. + if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) { + return false; + } + } + + intptr_t loffset = (intptr_t)target - (intptr_t)cl; + intptr_t hoffset = (intptr_t)target - (intptr_t)ch; + +#ifdef AARCH64 + return is_offset_in_range(loffset, 26) && is_offset_in_range(hoffset, 26); +#else + return check26(loffset - 8) && check26(hoffset - 8); +#endif +} + +bool MacroAssembler::reachable_from_cache(address target) { + assert(CodeCache::contains(pc()), "not supported"); + return _reachable_from_cache(target); +} + +// Can we reach the entire code cache from anywhere else in the code cache? +bool MacroAssembler::_cache_fully_reachable() { + address cl = first_cache_address(); + address ch = last_cache_address(); + return _reachable_from_cache(cl) && _reachable_from_cache(ch); +} + +bool MacroAssembler::cache_fully_reachable() { + assert(CodeCache::contains(pc()), "not supported"); + return _cache_fully_reachable(); +} + +void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { + assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); + if (reachable_from_cache(target)) { + relocate(rtype); + b(target NOT_AARCH64_ARG(cond)); + return; + } + + // Note: relocate is not needed for the code below, + // encoding targets in absolute format. + if (ignore_non_patchable_relocations()) { + rtype = relocInfo::none; + } + +#ifdef AARCH64 + assert (scratch != noreg, "should be specified"); + InlinedAddress address_literal(target, rtype); + ldr_literal(scratch, address_literal); + br(scratch); + int off = offset(); + bind_literal(address_literal); +#ifdef COMPILER2 + if (offset() - off == wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } +#endif +#else + if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) { + // Note: this version cannot be (atomically) patched + mov_slow(scratch, (intptr_t)target, cond); + bx(scratch, cond); + } else { + Label skip; + InlinedAddress address_literal(target); + if (cond != al) { + b(skip, inverse(cond)); + } + relocate(rtype); + ldr_literal(PC, address_literal); + bind_literal(address_literal); + bind(skip); + } +#endif // AARCH64 +} + +// Similar to jump except that: +// - near calls are valid only if any destination in the cache is near +// - no movt/movw (not atomically patchable) +void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch NOT_AARCH64_ARG(AsmCondition cond)) { + assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported"); + if (cache_fully_reachable()) { + // Note: this assumes that all possible targets (the initial one + // and the addressed patched to) are all in the code cache. + assert(CodeCache::contains(target), "target might be too far"); + relocate(rtype); + b(target NOT_AARCH64_ARG(cond)); + return; + } + + // Discard the relocation information if not needed for CacheCompiledCode + // since the next encodings are all in absolute format. + if (ignore_non_patchable_relocations()) { + rtype = relocInfo::none; + } + +#ifdef AARCH64 + assert (scratch != noreg, "should be specified"); + InlinedAddress address_literal(target); + relocate(rtype); + ldr_literal(scratch, address_literal); + br(scratch); + int off = offset(); + bind_literal(address_literal); +#ifdef COMPILER2 + if (offset() - off == wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } +#endif +#else + { + Label skip; + InlinedAddress address_literal(target); + if (cond != al) { + b(skip, inverse(cond)); + } + relocate(rtype); + ldr_literal(PC, address_literal); + bind_literal(address_literal); + bind(skip); + } +#endif // AARCH64 +} + +void MacroAssembler::call(address target, RelocationHolder rspec NOT_AARCH64_ARG(AsmCondition cond)) { + Register scratch = LR; + assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported"); + if (reachable_from_cache(target)) { + relocate(rspec); + bl(target NOT_AARCH64_ARG(cond)); + return; + } + + // Note: relocate is not needed for the code below, + // encoding targets in absolute format. + if (ignore_non_patchable_relocations()) { + // This assumes the information was needed only for relocating the code. + rspec = RelocationHolder::none; + } + +#ifndef AARCH64 + if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) { + // Note: this version cannot be (atomically) patched + mov_slow(scratch, (intptr_t)target, cond); + blx(scratch, cond); + return; + } +#endif + + { + Label ret_addr; +#ifndef AARCH64 + if (cond != al) { + b(ret_addr, inverse(cond)); + } +#endif + + +#ifdef AARCH64 + // TODO-AARCH64: make more optimal implementation + // [ Keep in sync with MacroAssembler::call_size ] + assert(rspec.type() == relocInfo::none, "call reloc not implemented"); + mov_slow(scratch, target); + blr(scratch); +#else + InlinedAddress address_literal(target); + relocate(rspec); + adr(LR, ret_addr); + ldr_literal(PC, address_literal); + + bind_literal(address_literal); + bind(ret_addr); +#endif + } +} + +#if defined(AARCH64) && defined(COMPILER2) +int MacroAssembler::call_size(address target, bool far, bool patchable) { + // FIXME: mov_slow is variable-length + if (!far) return 1; // bl + if (patchable) return 2; // ldr; blr + return instr_count_for_mov_slow((intptr_t)target) + 1; +} +#endif + +int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) { + assert(rspec.type() == relocInfo::static_call_type || + rspec.type() == relocInfo::none || + rspec.type() == relocInfo::opt_virtual_call_type, "not supported"); + + // Always generate the relocation information, needed for patching + relocate(rspec); // used by NativeCall::is_call_before() + if (cache_fully_reachable()) { + // Note: this assumes that all possible targets (the initial one + // and the addresses patched to) are all in the code cache. + assert(CodeCache::contains(target), "target might be too far"); + bl(target); + } else { +#if defined(AARCH64) && defined(COMPILER2) + if (c2) { + // return address needs to match call_size(). + // no need to trash Rtemp + int off = offset(); + Label skip_literal; + InlinedAddress address_literal(target); + ldr_literal(LR, address_literal); + blr(LR); + int ret_addr_offset = offset(); + assert(offset() - off == call_size(target, true, true) * InstructionSize, "need to fix call_size()"); + b(skip_literal); + int off2 = offset(); + bind_literal(address_literal); + if (offset() - off2 == wordSize) { + // no padding, so insert nop for worst-case sizing + nop(); + } + bind(skip_literal); + return ret_addr_offset; + } +#endif + Label ret_addr; + InlinedAddress address_literal(target); +#ifdef AARCH64 + ldr_literal(Rtemp, address_literal); + adr(LR, ret_addr); + br(Rtemp); +#else + adr(LR, ret_addr); + ldr_literal(PC, address_literal); +#endif + bind_literal(address_literal); + bind(ret_addr); + } + return offset(); +} + + +void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + ldr(tmp, Address(method, Method::const_offset())); + ldr(tmp, Address(tmp, ConstMethod::constants_offset())); + ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes())); + ldr(mirror, Address(tmp, mirror_offset)); +} + +/////////////////////////////////////////////////////////////////////////////// + +// Compressed pointers + +#ifdef AARCH64 + +void MacroAssembler::load_klass(Register dst_klass, Register src_oop) { + if (UseCompressedClassPointers) { + ldr_w(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst_klass); + } else { + ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes())); + } +} + +#else + +void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) { + ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond); +} + +#endif // AARCH64 + +// Blows src_klass. +void MacroAssembler::store_klass(Register src_klass, Register dst_oop) { +#ifdef AARCH64 + if (UseCompressedClassPointers) { + assert(src_klass != dst_oop, "not enough registers"); + encode_klass_not_null(src_klass); + str_w(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); + return; + } +#endif // AARCH64 + str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes())); +} + +#ifdef AARCH64 + +void MacroAssembler::store_klass_gap(Register dst) { + if (UseCompressedClassPointers) { + str_w(ZR, Address(dst, oopDesc::klass_gap_offset_in_bytes())); + } +} + +#endif // AARCH64 + + +void MacroAssembler::load_heap_oop(Register dst, Address src) { +#ifdef AARCH64 + if (UseCompressedOops) { + ldr_w(dst, src); + decode_heap_oop(dst); + return; + } +#endif // AARCH64 + ldr(dst, src); +} + +// Blows src and flags. +void MacroAssembler::store_heap_oop(Register src, Address dst) { +#ifdef AARCH64 + if (UseCompressedOops) { + assert(!dst.uses(src), "not enough registers"); + encode_heap_oop(src); + str_w(src, dst); + return; + } +#endif // AARCH64 + str(src, dst); +} + +void MacroAssembler::store_heap_oop_null(Register src, Address dst) { +#ifdef AARCH64 + if (UseCompressedOops) { + str_w(src, dst); + return; + } +#endif // AARCH64 + str(src, dst); +} + + +#ifdef AARCH64 + +// Algorithm must match oop.inline.hpp encode_heap_oop. +void MacroAssembler::encode_heap_oop(Register dst, Register src) { + // This code pattern is matched in NativeIntruction::skip_encode_heap_oop. + // Update it at modifications. + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); +#endif + verify_oop(src); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + _lsr(dst, src, Universe::narrow_oop_shift()); + } else if (dst != src) { + mov(dst, src); + } + } else { + tst(src, src); + csel(dst, Rheap_base, src, eq); + sub(dst, dst, Rheap_base); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + _lsr(dst, dst, Universe::narrow_oop_shift()); + } + } +} + +// Same algorithm as oop.inline.hpp decode_heap_oop. +void MacroAssembler::decode_heap_oop(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); +#endif + assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + tst(src, src); + add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); + csel(dst, dst, ZR, ne); + } else { + _lsl(dst, src, Universe::narrow_oop_shift()); + } + verify_oop(dst); +} + +#ifdef COMPILER2 +// Algorithm must match oop.inline.hpp encode_heap_oop. +// Must preserve condition codes, or C2 encodeHeapOop_not_null rule +// must be changed. +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); +#endif + verify_oop(src); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + _lsr(dst, src, Universe::narrow_oop_shift()); + } else if (dst != src) { + mov(dst, src); + } + } else { + sub(dst, src, Rheap_base); + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + _lsr(dst, dst, Universe::narrow_oop_shift()); + } + } +} + +// Same algorithm as oops.inline.hpp decode_heap_oop. +// Must preserve condition codes, or C2 decodeHeapOop_not_null rule +// must be changed. +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); +#endif + assert(Universe::narrow_oop_shift() == 0 || LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { + add(dst, Rheap_base, AsmOperand(src, lsl, Universe::narrow_oop_shift())); + } else { + _lsl(dst, src, Universe::narrow_oop_shift()); + } + verify_oop(dst); +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert(UseCompressedClassPointers, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + + // Relocation with special format (see relocInfo_arm.hpp). + relocate(rspec); + narrowKlass encoded_k = Klass::encode_klass(k); + movz(dst, encoded_k & 0xffff, 0); + movk(dst, (encoded_k >> 16) & 0xffff, 16); +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert(UseCompressedOops, "should only be used for compressed header"); + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + relocate(rspec); + movz(dst, 0xffff, 0); + movk(dst, 0xffff, 16); +} + +#endif // COMPILER2 + +// Must preserve condition codes, or C2 encodeKlass_not_null rule +// must be changed. +void MacroAssembler::encode_klass_not_null(Register r) { + if (Universe::narrow_klass_base() != NULL) { + // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. + assert(r != Rheap_base, "Encoding a klass in Rheap_base"); + mov_slow(Rheap_base, Universe::narrow_klass_base()); + sub(r, r, Rheap_base); + } + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + _lsr(r, r, Universe::narrow_klass_shift()); + } + if (Universe::narrow_klass_base() != NULL) { + reinit_heapbase(); + } +} + +// Must preserve condition codes, or C2 encodeKlass_not_null rule +// must be changed. +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + if (dst == src) { + encode_klass_not_null(src); + return; + } + if (Universe::narrow_klass_base() != NULL) { + mov_slow(dst, (int64_t)Universe::narrow_klass_base()); + sub(dst, src, dst); + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + _lsr(dst, dst, Universe::narrow_klass_shift()); + } + } else { + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + _lsr(dst, src, Universe::narrow_klass_shift()); + } else { + mov(dst, src); + } + } +} + +// Function instr_count_for_decode_klass_not_null() counts the instructions +// generated by decode_klass_not_null(register r) and reinit_heapbase(), +// when (Universe::heap() != NULL). Hence, if the instructions they +// generate change, then this method needs to be updated. +int MacroAssembler::instr_count_for_decode_klass_not_null() { + assert(UseCompressedClassPointers, "only for compressed klass ptrs"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + if (Universe::narrow_klass_base() != NULL) { + return instr_count_for_mov_slow(Universe::narrow_klass_base()) + // mov_slow + 1 + // add + instr_count_for_mov_slow(Universe::narrow_ptrs_base()); // reinit_heapbase() = mov_slow + } else { + if (Universe::narrow_klass_shift() != 0) { + return 1; + } + } + return 0; +} + +// Must preserve condition codes, or C2 decodeKlass_not_null rule +// must be changed. +void MacroAssembler::decode_klass_not_null(Register r) { + int off = offset(); + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + assert(r != Rheap_base, "Decoding a klass in Rheap_base"); + // Cannot assert, instr_count_for_decode_klass_not_null() counts instructions. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_base() != NULL) { + // Use Rheap_base as a scratch register in which to temporarily load the narrow_klass_base. + mov_slow(Rheap_base, Universe::narrow_klass_base()); + add(r, Rheap_base, AsmOperand(r, lsl, Universe::narrow_klass_shift())); + reinit_heapbase(); + } else { + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + _lsl(r, r, Universe::narrow_klass_shift()); + } + } + assert((offset() - off) == (instr_count_for_decode_klass_not_null() * InstructionSize), "need to fix instr_count_for_decode_klass_not_null"); +} + +// Must preserve condition codes, or C2 decodeKlass_not_null rule +// must be changed. +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + if (src == dst) { + decode_klass_not_null(src); + return; + } + + assert(UseCompressedClassPointers, "should only be used for compressed headers"); + assert(Universe::heap() != NULL, "java heap should be initialized"); + assert(src != Rheap_base, "Decoding a klass in Rheap_base"); + assert(dst != Rheap_base, "Decoding a klass into Rheap_base"); + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_base() != NULL) { + mov_slow(dst, Universe::narrow_klass_base()); + add(dst, dst, AsmOperand(src, lsl, Universe::narrow_klass_shift())); + } else { + _lsl(dst, src, Universe::narrow_klass_shift()); + } +} + + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops || UseCompressedClassPointers) { + if (Universe::heap() != NULL) { + mov_slow(Rheap_base, Universe::narrow_ptrs_base()); + } else { + ldr_global_ptr(Rheap_base, (address)Universe::narrow_ptrs_base_addr()); + } + } +} + +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) { + // This code pattern is matched in NativeIntruction::skip_verify_heapbase. + // Update it at modifications. + assert (UseCompressedOops, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + if (CheckCompressedOops) { + Label ok; + raw_push(Rtemp, ZR); + mrs(Rtemp, Assembler::SysReg_NZCV); + str(Rtemp, Address(SP, 1 * wordSize)); + mov_slow(Rtemp, Universe::narrow_ptrs_base()); + cmp(Rheap_base, Rtemp); + b(ok, eq); + stop(msg); + bind(ok); + ldr(Rtemp, Address(SP, 1 * wordSize)); + msr(Assembler::SysReg_NZCV, Rtemp); + raw_pop(Rtemp, ZR); + } +} +#endif // ASSERT + +#endif // AARCH64 + +#ifdef COMPILER2 +void MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) +{ + assert(VM_Version::supports_ldrex(), "unsupported, yet?"); + + Register Rmark = Rscratch2; + + assert(Roop != Rscratch, ""); + assert(Roop != Rmark, ""); + assert(Rbox != Rscratch, ""); + assert(Rbox != Rmark, ""); + + Label fast_lock, done; + + if (UseBiasedLocking && !UseOptoBiasInlining) { + Label failed; +#ifdef AARCH64 + biased_locking_enter(Roop, Rmark, Rscratch, false, Rscratch3, done, failed); +#else + biased_locking_enter(Roop, Rmark, Rscratch, false, noreg, done, failed); +#endif + bind(failed); + } + + ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); + tst(Rmark, markOopDesc::unlocked_value); + b(fast_lock, ne); + + // Check for recursive lock + // See comments in InterpreterMacroAssembler::lock_object for + // explanations on the fast recursive locking check. +#ifdef AARCH64 + intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); + Assembler::LogicalImmediate imm(mask, false); + mov(Rscratch, SP); + sub(Rscratch, Rmark, Rscratch); + ands(Rscratch, Rscratch, imm); + b(done, ne); // exit with failure + str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // set to zero + b(done); + +#else + // -1- test low 2 bits + movs(Rscratch, AsmOperand(Rmark, lsl, 30)); + // -2- test (hdr - SP) if the low two bits are 0 + sub(Rscratch, Rmark, SP, eq); + movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); + // If still 'eq' then recursive locking OK + str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes()), eq); // set to zero + b(done); +#endif + + bind(fast_lock); + str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + + bool allow_fallthrough_on_failure = true; + bool one_shot = true; + cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); + + bind(done); + +} + +void MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2 AARCH64_ONLY_ARG(Register Rscratch3)) +{ + assert(VM_Version::supports_ldrex(), "unsupported, yet?"); + + Register Rmark = Rscratch2; + + assert(Roop != Rscratch, ""); + assert(Roop != Rmark, ""); + assert(Rbox != Rscratch, ""); + assert(Rbox != Rmark, ""); + + Label done; + + if (UseBiasedLocking && !UseOptoBiasInlining) { + biased_locking_exit(Roop, Rscratch, done); + } + + ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + // If hdr is NULL, we've got recursive locking and there's nothing more to do + cmp(Rmark, 0); + b(done, eq); + + // Restore the object header + bool allow_fallthrough_on_failure = true; + bool one_shot = true; + cas_for_lock_release(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); + + bind(done); + +} +#endif // COMPILER2 +