--- /dev/null 2016-08-24 15:41:39.598575000 -0400 +++ new/src/cpu/arm/vm/interp_masm_arm.cpp 2016-12-13 12:50:40.156822613 -0500 @@ -0,0 +1,2272 @@ +/* + * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/barrierSet.inline.hpp" +#include "gc/shared/cardTableModRefBS.inline.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "interp_masm_arm.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "logging/log.hpp" +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/methodData.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/sharedRuntime.hpp" + +#if INCLUDE_ALL_GCS +#include "gc/g1/g1CollectedHeap.inline.hpp" +#include "gc/g1/g1SATBCardTableModRefBS.hpp" +#include "gc/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +//-------------------------------------------------------------------- +// Implementation of InterpreterMacroAssembler + + + + +InterpreterMacroAssembler::InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) { +} + +void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { +#if defined(ASSERT) && !defined(AARCH64) + // Ensure that last_sp is not filled. + { Label L; + ldr(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); + cbz(Rtemp, L); + stop("InterpreterMacroAssembler::call_VM_helper: last_sp != NULL"); + bind(L); + } +#endif // ASSERT && !AARCH64 + + // Rbcp must be saved/restored since it may change due to GC. + save_bcp(); + +#ifdef AARCH64 + check_no_cached_stack_top(Rtemp); + save_stack_top(); + check_extended_sp(Rtemp); + cut_sp_before_call(); +#endif // AARCH64 + + // super call + MacroAssembler::call_VM_helper(oop_result, entry_point, number_of_arguments, check_exceptions); + +#ifdef AARCH64 + // Restore SP to extended SP + restore_sp_after_call(Rtemp); + check_stack_top(); + clear_cached_stack_top(); +#endif // AARCH64 + + // Restore interpreter specific registers. + restore_bcp(); + restore_method(); +} + +void InterpreterMacroAssembler::jump_to_entry(address entry) { + assert(entry, "Entry must have been generated by now"); + b(entry); +} + +void InterpreterMacroAssembler::check_and_handle_popframe() { + if (can_pop_frame()) { + Label L; + const Register popframe_cond = R2_tmp; + + // Initiate popframe handling only if it is not already being processed. If the flag + // has the popframe_processing bit set, it means that this code is called *during* popframe + // handling - we don't want to reenter. + + ldr_s32(popframe_cond, Address(Rthread, JavaThread::popframe_condition_offset())); + tbz(popframe_cond, exact_log2(JavaThread::popframe_pending_bit), L); + tbnz(popframe_cond, exact_log2(JavaThread::popframe_processing_bit), L); + + // Call Interpreter::remove_activation_preserving_args_entry() to get the + // address of the same-named entrypoint in the generated interpreter code. + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); + + // Call indirectly to avoid generation ordering problem. + jump(R0); + + bind(L); + } +} + + +// Blows R2, Rtemp. Sets TOS cached value. +void InterpreterMacroAssembler::load_earlyret_value(TosState state) { + const Register thread_state = R2_tmp; + + ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); + + const Address tos_addr(thread_state, JvmtiThreadState::earlyret_tos_offset()); + const Address oop_addr(thread_state, JvmtiThreadState::earlyret_oop_offset()); + const Address val_addr(thread_state, JvmtiThreadState::earlyret_value_offset()); +#ifndef AARCH64 + const Address val_addr_hi(thread_state, JvmtiThreadState::earlyret_value_offset() + + in_ByteSize(wordSize)); +#endif // !AARCH64 + + Register zero = zero_register(Rtemp); + + switch (state) { + case atos: ldr(R0_tos, oop_addr); + str(zero, oop_addr); + interp_verify_oop(R0_tos, state, __FILE__, __LINE__); + break; + +#ifdef AARCH64 + case ltos: ldr(R0_tos, val_addr); break; +#else + case ltos: ldr(R1_tos_hi, val_addr_hi); // fall through +#endif // AARCH64 + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: ldr_s32(R0_tos, val_addr); break; +#ifdef __SOFTFP__ + case dtos: ldr(R1_tos_hi, val_addr_hi); // fall through + case ftos: ldr(R0_tos, val_addr); break; +#else + case ftos: ldr_float (S0_tos, val_addr); break; + case dtos: ldr_double(D0_tos, val_addr); break; +#endif // __SOFTFP__ + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + // Clean up tos value in the thread object + str(zero, val_addr); +#ifndef AARCH64 + str(zero, val_addr_hi); +#endif // !AARCH64 + + mov(Rtemp, (int) ilgl); + str_32(Rtemp, tos_addr); +} + + +// Blows R2, Rtemp. +void InterpreterMacroAssembler::check_and_handle_earlyret() { + if (can_force_early_return()) { + Label L; + const Register thread_state = R2_tmp; + + ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); + cbz(thread_state, L); // if (thread->jvmti_thread_state() == NULL) exit; + + // Initiate earlyret handling only if it is not already being processed. + // If the flag has the earlyret_processing bit set, it means that this code + // is called *during* earlyret handling - we don't want to reenter. + + ldr_s32(Rtemp, Address(thread_state, JvmtiThreadState::earlyret_state_offset())); + cmp(Rtemp, JvmtiThreadState::earlyret_pending); + b(L, ne); + + // Call Interpreter::remove_activation_early_entry() to get the address of the + // same-named entrypoint in the generated interpreter code. + + ldr_s32(R0, Address(thread_state, JvmtiThreadState::earlyret_tos_offset())); + call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), R0); + + jump(R0); + + bind(L); + } +} + + +// Sets reg. Blows Rtemp. +void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { + assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); + assert(reg != Rtemp, "should be different registers"); + + ldrb(Rtemp, Address(Rbcp, bcp_offset)); + ldrb(reg, Address(Rbcp, bcp_offset+1)); + orr(reg, reg, AsmOperand(Rtemp, lsl, BitsPerByte)); +} + +void InterpreterMacroAssembler::get_index_at_bcp(Register index, int bcp_offset, Register tmp_reg, size_t index_size) { + assert_different_registers(index, tmp_reg); + if (index_size == sizeof(u2)) { + // load bytes of index separately to avoid unaligned access + ldrb(index, Address(Rbcp, bcp_offset+1)); + ldrb(tmp_reg, Address(Rbcp, bcp_offset)); + orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); + } else if (index_size == sizeof(u4)) { + // TODO-AARCH64: consider using unaligned access here + ldrb(index, Address(Rbcp, bcp_offset+3)); + ldrb(tmp_reg, Address(Rbcp, bcp_offset+2)); + orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); + ldrb(tmp_reg, Address(Rbcp, bcp_offset+1)); + orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); + ldrb(tmp_reg, Address(Rbcp, bcp_offset)); + orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); + // Check if the secondary index definition is still ~x, otherwise + // we have to change the following assembler code to calculate the + // plain index. + assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); + mvn_32(index, index); // convert to plain index + } else if (index_size == sizeof(u1)) { + ldrb(index, Address(Rbcp, bcp_offset)); + } else { + ShouldNotReachHere(); + } +} + +// Sets cache, index. +void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert_different_registers(cache, index); + + get_index_at_bcp(index, bcp_offset, cache, index_size); + + // load constant pool cache pointer + ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); + + // convert from field index to ConstantPoolCacheEntry index + assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); + // TODO-AARCH64 merge this shift with shift "add(..., Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord))" after this method is called + logical_shift_left(index, index, 2); +} + +// Sets cache, index, bytecode. +void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size) { + get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); + // caution index and bytecode can be the same + add(bytecode, cache, AsmOperand(index, lsl, LogBytesPerWord)); +#ifdef AARCH64 + add(bytecode, bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); + ldarb(bytecode, bytecode); +#else + ldrb(bytecode, Address(bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()))); + TemplateTable::volatile_barrier(MacroAssembler::LoadLoad, noreg, true); +#endif // AARCH64 +} + +// Sets cache. Blows reg_tmp. +void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register reg_tmp, int bcp_offset, size_t index_size) { + assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); + assert_different_registers(cache, reg_tmp); + + get_index_at_bcp(reg_tmp, bcp_offset, cache, index_size); + + // load constant pool cache pointer + ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); + + // skip past the header + add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); + // convert from field index to ConstantPoolCacheEntry index + // and from word offset to byte offset + assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); + add(cache, cache, AsmOperand(reg_tmp, lsl, 2 + LogBytesPerWord)); +} + +// Load object from cpool->resolved_references(index) +void InterpreterMacroAssembler::load_resolved_reference_at_index( + Register result, Register index) { + assert_different_registers(result, index); + get_constant_pool(result); + + Register cache = result; + // load pointer for resolved_references[] objArray + ldr(cache, Address(result, ConstantPool::resolved_references_offset_in_bytes())); + // JNIHandles::resolve(result) + ldr(cache, Address(cache, 0)); + // Add in the index + // convert from field index to resolved_references() index and from + // word index to byte offset. Since this is a java object, it can be compressed + add(cache, cache, AsmOperand(index, lsl, LogBytesPerHeapOop)); + load_heap_oop(result, Address(cache, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); +} + +// Generate a subtype check: branch to not_subtype if sub_klass is +// not a subtype of super_klass. +// Profiling code for the subtype check failure (profile_typecheck_failed) +// should be explicitly generated by the caller in the not_subtype case. +// Blows Rtemp, tmp1, tmp2. +void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, + Register Rsuper_klass, + Label ¬_subtype, + Register tmp1, + Register tmp2) { + + assert_different_registers(Rsub_klass, Rsuper_klass, tmp1, tmp2, Rtemp); + Label ok_is_subtype, loop, update_cache; + + const Register super_check_offset = tmp1; + const Register cached_super = tmp2; + + // Profile the not-null value's klass. + profile_typecheck(tmp1, Rsub_klass); + + // Load the super-klass's check offset into + ldr_u32(super_check_offset, Address(Rsuper_klass, Klass::super_check_offset_offset())); + + // Check for self + cmp(Rsub_klass, Rsuper_klass); + + // Load from the sub-klass's super-class display list, or a 1-word cache of + // the secondary superclass list, or a failing value with a sentinel offset + // if the super-klass is an interface or exceptionally deep in the Java + // hierarchy and we have to scan the secondary superclass list the hard way. + // See if we get an immediate positive hit + ldr(cached_super, Address(Rsub_klass, super_check_offset)); + + cond_cmp(Rsuper_klass, cached_super, ne); + b(ok_is_subtype, eq); + + // Check for immediate negative hit + cmp(super_check_offset, in_bytes(Klass::secondary_super_cache_offset())); + b(not_subtype, ne); + + // Now do a linear scan of the secondary super-klass chain. + const Register supers_arr = tmp1; + const Register supers_cnt = tmp2; + const Register cur_super = Rtemp; + + // Load objArrayOop of secondary supers. + ldr(supers_arr, Address(Rsub_klass, Klass::secondary_supers_offset())); + + ldr_u32(supers_cnt, Address(supers_arr, Array::length_offset_in_bytes())); // Load the array length +#ifdef AARCH64 + cbz(supers_cnt, not_subtype); + add(supers_arr, supers_arr, Array::base_offset_in_bytes()); +#else + cmp(supers_cnt, 0); + + // Skip to the start of array elements and prefetch the first super-klass. + ldr(cur_super, Address(supers_arr, Array::base_offset_in_bytes(), pre_indexed), ne); + b(not_subtype, eq); +#endif // AARCH64 + + bind(loop); + +#ifdef AARCH64 + ldr(cur_super, Address(supers_arr, wordSize, post_indexed)); +#endif // AARCH64 + + cmp(cur_super, Rsuper_klass); + b(update_cache, eq); + + subs(supers_cnt, supers_cnt, 1); + +#ifndef AARCH64 + ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne); +#endif // !AARCH64 + + b(loop, ne); + + b(not_subtype); + + bind(update_cache); + // Must be equal but missed in cache. Update cache. + str(Rsuper_klass, Address(Rsub_klass, Klass::secondary_super_cache_offset())); + + bind(ok_is_subtype); +} + + +// The 1st part of the store check. +// Sets card_table_base register. +void InterpreterMacroAssembler::store_check_part1(Register card_table_base) { + // Check barrier set type (should be card table) and element size + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableForRS || + bs->kind() == BarrierSet::CardTableExtension, + "Wrong barrier set kind"); + + CardTableModRefBS* ct = barrier_set_cast(bs); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "Adjust store check code"); + + // Load card table base address. + + /* Performance note. + + There is an alternative way of loading card table base address + from thread descriptor, which may look more efficient: + + ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset())); + + However, performance measurements of micro benchmarks and specJVM98 + showed that loading of card table base from thread descriptor is + 7-18% slower compared to loading of literal embedded into the code. + Possible cause is a cache miss (card table base address resides in a + rarely accessed area of thread descriptor). + */ + // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64 + mov_address(card_table_base, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); +} + +// The 2nd part of the store check. +void InterpreterMacroAssembler::store_check_part2(Register obj, Register card_table_base, Register tmp) { + assert_different_registers(obj, card_table_base, tmp); + + assert(CardTableModRefBS::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations."); +#ifdef AARCH64 + add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTableModRefBS::card_shift)); + Address card_table_addr(card_table_base); +#else + Address card_table_addr(card_table_base, obj, lsr, CardTableModRefBS::card_shift); +#endif + + if (UseCondCardMark) { + if (UseConcMarkSweepGC) { + membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), noreg); + } + Label already_dirty; + + ldrb(tmp, card_table_addr); + cbz(tmp, already_dirty); + + set_card(card_table_base, card_table_addr, tmp); + bind(already_dirty); + + } else { + if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { + membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg); + } + set_card(card_table_base, card_table_addr, tmp); + } +} + +void InterpreterMacroAssembler::set_card(Register card_table_base, Address card_table_addr, Register tmp) { +#ifdef AARCH64 + strb(ZR, card_table_addr); +#else + CardTableModRefBS* ct = barrier_set_cast(Universe::heap()->barrier_set()); + if ((((uintptr_t)ct->byte_map_base & 0xff) == 0)) { + // Card table is aligned so the lowest byte of the table address base is zero. + // This works only if the code is not saved for later use, possibly + // in a context where the base would no longer be aligned. + strb(card_table_base, card_table_addr); + } else { + mov(tmp, 0); + strb(tmp, card_table_addr); + } +#endif // AARCH64 +} + +////////////////////////////////////////////////////////////////////////////////// +#if INCLUDE_ALL_GCS + +// G1 pre-barrier. +// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// If store_addr != noreg, then previous value is loaded from [store_addr]; +// in such case store_addr and new_val registers are preserved; +// otherwise pre_val register is preserved. +void InterpreterMacroAssembler::g1_write_barrier_pre(Register store_addr, + Register new_val, + Register pre_val, + Register tmp1, + Register tmp2) { + Label done; + Label runtime; + + if (store_addr != noreg) { + assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); + } else { + assert (new_val == noreg, "should be"); + assert_different_registers(pre_val, tmp1, tmp2, noreg); + } + + Address in_progress(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_active())); + Address index(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_index())); + Address buffer(Rthread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_buf())); + + // Is marking active? + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); + ldrb(tmp1, in_progress); + cbz(tmp1, done); + + // Do we need to load the previous value? + if (store_addr != noreg) { + load_heap_oop(pre_val, Address(store_addr, 0)); + } + + // Is the previous value null? + cbz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + ldr(tmp1, index); // tmp1 := *index_adr + ldr(tmp2, buffer); + + subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize + b(runtime, lt); // If negative, goto runtime + + str(tmp1, index); // *index_adr := tmp1 + + // Record the previous value + str(pre_val, Address(tmp2, tmp1)); + b(done); + + bind(runtime); + + // save the live input values +#ifdef AARCH64 + if (store_addr != noreg) { + raw_push(store_addr, new_val); + } else { + raw_push(pre_val, ZR); + } +#else + if (store_addr != noreg) { + // avoid raw_push to support any ordering of store_addr and new_val + push(RegisterSet(store_addr) | RegisterSet(new_val)); + } else { + push(pre_val); + } +#endif // AARCH64 + + if (pre_val != R0) { + mov(R0, pre_val); + } + mov(R1, Rthread); + + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); + +#ifdef AARCH64 + if (store_addr != noreg) { + raw_pop(store_addr, new_val); + } else { + raw_pop(pre_val, ZR); + } +#else + if (store_addr != noreg) { + pop(RegisterSet(store_addr) | RegisterSet(new_val)); + } else { + pop(pre_val); + } +#endif // AARCH64 + + bind(done); +} + +// G1 post-barrier. +// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +void InterpreterMacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + Register tmp3) { + + Address queue_index(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_index())); + Address buffer(Rthread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_buf())); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // Does store cross heap regions? + + eor(tmp1, store_addr, new_val); +#ifdef AARCH64 + logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); + cbz(tmp1, done); +#else + movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); + b(done, eq); +#endif + + // crosses regions, storing NULL? + + cbz(new_val, done); + + // storing region crossing non-NULL, is card already dirty? + const Register card_addr = tmp1; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + mov_address(tmp2, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); + add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTableModRefBS::card_shift)); + + ldrb(tmp2, Address(card_addr)); + cmp(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val()); + b(done, eq); + + membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); + + assert(CardTableModRefBS::dirty_card_val() == 0, "adjust this code"); + ldrb(tmp2, Address(card_addr)); + cbz(tmp2, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + strb(zero_register(tmp2), Address(card_addr)); + + ldr(tmp2, queue_index); + ldr(tmp3, buffer); + + subs(tmp2, tmp2, wordSize); + b(runtime, lt); // go to runtime if now negative + + str(tmp2, queue_index); + + str(card_addr, Address(tmp3, tmp2)); + b(done); + + bind(runtime); + + if (card_addr != R0) { + mov(R0, card_addr); + } + mov(R1, Rthread); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); + + bind(done); +} + +#endif // INCLUDE_ALL_GCS +////////////////////////////////////////////////////////////////////////////////// + + +// Java Expression Stack + +void InterpreterMacroAssembler::pop_ptr(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + ldr(r, Address(Rstack_top, wordSize, post_indexed)); +} + +void InterpreterMacroAssembler::pop_i(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + ldr_s32(r, Address(Rstack_top, wordSize, post_indexed)); + zap_high_non_significant_bits(r); +} + +#ifdef AARCH64 +void InterpreterMacroAssembler::pop_l(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + ldr(r, Address(Rstack_top, 2*wordSize, post_indexed)); +} +#else +void InterpreterMacroAssembler::pop_l(Register lo, Register hi) { + assert_different_registers(lo, hi); + assert(lo < hi, "lo must be < hi"); + pop(RegisterSet(lo) | RegisterSet(hi)); +} +#endif // AARCH64 + +void InterpreterMacroAssembler::pop_f(FloatRegister fd) { +#ifdef AARCH64 + ldr_s(fd, Address(Rstack_top, wordSize, post_indexed)); +#else + fpops(fd); +#endif // AARCH64 +} + +void InterpreterMacroAssembler::pop_d(FloatRegister fd) { +#ifdef AARCH64 + ldr_d(fd, Address(Rstack_top, 2*wordSize, post_indexed)); +#else + fpopd(fd); +#endif // AARCH64 +} + + +// Transition vtos -> state. Blows R0, R1. Sets TOS cached value. +void InterpreterMacroAssembler::pop(TosState state) { + switch (state) { + case atos: pop_ptr(R0_tos); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: pop_i(R0_tos); break; +#ifdef AARCH64 + case ltos: pop_l(R0_tos); break; +#else + case ltos: pop_l(R0_tos_lo, R1_tos_hi); break; +#endif // AARCH64 +#ifdef __SOFTFP__ + case ftos: pop_i(R0_tos); break; + case dtos: pop_l(R0_tos_lo, R1_tos_hi); break; +#else + case ftos: pop_f(S0_tos); break; + case dtos: pop_d(D0_tos); break; +#endif // __SOFTFP__ + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } + interp_verify_oop(R0_tos, state, __FILE__, __LINE__); +} + +void InterpreterMacroAssembler::push_ptr(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + str(r, Address(Rstack_top, -wordSize, pre_indexed)); + check_stack_top_on_expansion(); +} + +void InterpreterMacroAssembler::push_i(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + str_32(r, Address(Rstack_top, -wordSize, pre_indexed)); + check_stack_top_on_expansion(); +} + +#ifdef AARCH64 +void InterpreterMacroAssembler::push_l(Register r) { + assert(r != Rstack_top, "unpredictable instruction"); + stp(r, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); + check_stack_top_on_expansion(); +} +#else +void InterpreterMacroAssembler::push_l(Register lo, Register hi) { + assert_different_registers(lo, hi); + assert(lo < hi, "lo must be < hi"); + push(RegisterSet(lo) | RegisterSet(hi)); +} +#endif // AARCH64 + +void InterpreterMacroAssembler::push_f() { +#ifdef AARCH64 + str_s(S0_tos, Address(Rstack_top, -wordSize, pre_indexed)); + check_stack_top_on_expansion(); +#else + fpushs(S0_tos); +#endif // AARCH64 +} + +void InterpreterMacroAssembler::push_d() { +#ifdef AARCH64 + str_d(D0_tos, Address(Rstack_top, -2*wordSize, pre_indexed)); + check_stack_top_on_expansion(); +#else + fpushd(D0_tos); +#endif // AARCH64 +} + +// Transition state -> vtos. Blows Rtemp. +void InterpreterMacroAssembler::push(TosState state) { + interp_verify_oop(R0_tos, state, __FILE__, __LINE__); + switch (state) { + case atos: push_ptr(R0_tos); break; + case btos: // fall through + case ztos: // fall through + case ctos: // fall through + case stos: // fall through + case itos: push_i(R0_tos); break; +#ifdef AARCH64 + case ltos: push_l(R0_tos); break; +#else + case ltos: push_l(R0_tos_lo, R1_tos_hi); break; +#endif // AARCH64 +#ifdef __SOFTFP__ + case ftos: push_i(R0_tos); break; + case dtos: push_l(R0_tos_lo, R1_tos_hi); break; +#else + case ftos: push_f(); break; + case dtos: push_d(); break; +#endif // __SOFTFP__ + case vtos: /* nothing to do */ break; + default : ShouldNotReachHere(); + } +} + + +#ifndef AARCH64 + +// Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value. +void InterpreterMacroAssembler::convert_retval_to_tos(TosState state) { +#if (!defined __SOFTFP__ && !defined __ABI_HARD__) + // According to interpreter calling conventions, result is returned in R0/R1, + // but templates expect ftos in S0, and dtos in D0. + if (state == ftos) { + fmsr(S0_tos, R0); + } else if (state == dtos) { + fmdrr(D0_tos, R0, R1); + } +#endif // !__SOFTFP__ && !__ABI_HARD__ +} + +// Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions). +void InterpreterMacroAssembler::convert_tos_to_retval(TosState state) { +#if (!defined __SOFTFP__ && !defined __ABI_HARD__) + // According to interpreter calling conventions, result is returned in R0/R1, + // so ftos (S0) and dtos (D0) are moved to R0/R1. + if (state == ftos) { + fmrs(R0, S0_tos); + } else if (state == dtos) { + fmrrd(R0, R1, D0_tos); + } +#endif // !__SOFTFP__ && !__ABI_HARD__ +} + +#endif // !AARCH64 + + +// Helpers for swap and dup +void InterpreterMacroAssembler::load_ptr(int n, Register val) { + ldr(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); +} + +void InterpreterMacroAssembler::store_ptr(int n, Register val) { + str(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); +} + + +void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { +#ifdef AARCH64 + check_no_cached_stack_top(Rtemp); + save_stack_top(); + cut_sp_before_call(); + mov(Rparams, Rstack_top); +#endif // AARCH64 + + // set sender sp + mov(Rsender_sp, SP); + +#ifndef AARCH64 + // record last_sp + str(Rsender_sp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); +#endif // !AARCH64 +} + +// Jump to from_interpreted entry of a call unless single stepping is possible +// in this thread in which case we must call the i2i entry +void InterpreterMacroAssembler::jump_from_interpreted(Register method) { + assert_different_registers(method, Rtemp); + + prepare_to_jump_from_interpreted(); + + if (can_post_interpreter_events()) { + // JVMTI events, such as single-stepping, are implemented partly by avoiding running + // compiled code in threads for which the event is enabled. Check here for + // interp_only_mode if these events CAN be enabled. + + ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); +#ifdef AARCH64 + { + Label not_interp_only_mode; + + cbz(Rtemp, not_interp_only_mode); + indirect_jump(Address(method, Method::interpreter_entry_offset()), Rtemp); + + bind(not_interp_only_mode); + } +#else + cmp(Rtemp, 0); + ldr(PC, Address(method, Method::interpreter_entry_offset()), ne); +#endif // AARCH64 + } + + indirect_jump(Address(method, Method::from_interpreted_offset()), Rtemp); +} + + +void InterpreterMacroAssembler::restore_dispatch() { + mov_slow(RdispatchTable, (address)Interpreter::dispatch_table(vtos)); +} + + +// The following two routines provide a hook so that an implementation +// can schedule the dispatch in two parts. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { + // Nothing ARM-specific to be done here. +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { + dispatch_next(state, step); +} + +void InterpreterMacroAssembler::dispatch_base(TosState state, + DispatchTableMode table_mode, + bool verifyoop) { + if (VerifyActivationFrameSize) { + Label L; +#ifdef AARCH64 + mov(Rtemp, SP); + sub(Rtemp, FP, Rtemp); +#else + sub(Rtemp, FP, SP); +#endif // AARCH64 + int min_frame_size = (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * wordSize; + cmp(Rtemp, min_frame_size); + b(L, ge); + stop("broken stack frame"); + bind(L); + } + + if (verifyoop) { + interp_verify_oop(R0_tos, state, __FILE__, __LINE__); + } + + if((state == itos) || (state == btos) || (state == ztos) || (state == ctos) || (state == stos)) { + zap_high_non_significant_bits(R0_tos); + } + +#ifdef ASSERT + Label L; + mov_slow(Rtemp, (address)Interpreter::dispatch_table(vtos)); + cmp(Rtemp, RdispatchTable); + b(L, eq); + stop("invalid RdispatchTable"); + bind(L); +#endif + + if (table_mode == DispatchDefault) { + if (state == vtos) { + indirect_jump(Address::indexed_ptr(RdispatchTable, R3_bytecode), Rtemp); + } else { +#ifdef AARCH64 + sub(Rtemp, R3_bytecode, (Interpreter::distance_from_dispatch_table(vtos) - + Interpreter::distance_from_dispatch_table(state))); + indirect_jump(Address::indexed_ptr(RdispatchTable, Rtemp), Rtemp); +#else + // on 32-bit ARM this method is faster than the one above. + sub(Rtemp, RdispatchTable, (Interpreter::distance_from_dispatch_table(vtos) - + Interpreter::distance_from_dispatch_table(state)) * wordSize); + indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); +#endif + } + } else { + assert(table_mode == DispatchNormal, "invalid dispatch table mode"); + address table = (address) Interpreter::normal_table(state); + mov_slow(Rtemp, table); + indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); + } + + nop(); // to avoid filling CPU pipeline with invalid instructions + nop(); +} + +void InterpreterMacroAssembler::dispatch_only(TosState state) { + dispatch_base(state, DispatchDefault); +} + + +void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { + dispatch_base(state, DispatchNormal); +} + +void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { + dispatch_base(state, DispatchNormal, false); +} + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { + // load next bytecode and advance Rbcp + ldrb(R3_bytecode, Address(Rbcp, step, pre_indexed)); + dispatch_base(state, DispatchDefault); +} + +void InterpreterMacroAssembler::narrow(Register result) { + // mask integer result to narrower return type. + const Register Rtmp = R2; + + // get method type + ldr(Rtmp, Address(Rmethod, Method::const_offset())); + ldrb(Rtmp, Address(Rtmp, ConstMethod::result_type_offset())); + + Label notBool, notByte, notChar, done; + cmp(Rtmp, T_INT); + b(done, eq); + + cmp(Rtmp, T_BOOLEAN); + b(notBool, ne); + and_32(result, result, 1); + b(done); + + bind(notBool); + cmp(Rtmp, T_BYTE); + b(notByte, ne); + sign_extend(result, result, 8); + b(done); + + bind(notByte); + cmp(Rtmp, T_CHAR); + b(notChar, ne); + zero_extend(result, result, 16); + b(done); + + bind(notChar); + // cmp(Rtmp, T_SHORT); + // b(done, ne); + sign_extend(result, result, 16); + + // Nothing to do + bind(done); +} + +// remove activation +// +// Unlock the receiver if this is a synchronized method. +// Unlock any Java monitors from syncronized blocks. +// Remove the activation from the stack. +// +// If there are locked Java monitors +// If throw_monitor_exception +// throws IllegalMonitorStateException +// Else if install_monitor_exception +// installs IllegalMonitorStateException +// Else +// no error processing +void InterpreterMacroAssembler::remove_activation(TosState state, Register ret_addr, + bool throw_monitor_exception, + bool install_monitor_exception, + bool notify_jvmdi) { + Label unlock, unlocked, no_unlock; + + // Note: Registers R0, R1, S0 and D0 (TOS cached value) may be in use for the result. + + const Address do_not_unlock_if_synchronized(Rthread, + JavaThread::do_not_unlock_if_synchronized_offset()); + + const Register Rflag = R2; + const Register Raccess_flags = R3; + + restore_method(); + + ldrb(Rflag, do_not_unlock_if_synchronized); + + // get method access flags + ldr_u32(Raccess_flags, Address(Rmethod, Method::access_flags_offset())); + + strb(zero_register(Rtemp), do_not_unlock_if_synchronized); // reset the flag + + // check if method is synchronized + + tbz(Raccess_flags, JVM_ACC_SYNCHRONIZED_BIT, unlocked); + + // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. + cbnz(Rflag, no_unlock); + + // unlock monitor + push(state); // save result + + // BasicObjectLock will be first in list, since this is a synchronized method. However, need + // to check that the object has not been unlocked by an explicit monitorexit bytecode. + + const Register Rmonitor = R1; // fixed in unlock_object() + const Register Robj = R2; + + // address of first monitor + sub(Rmonitor, FP, - frame::interpreter_frame_monitor_block_bottom_offset * wordSize + (int)sizeof(BasicObjectLock)); + + ldr(Robj, Address(Rmonitor, BasicObjectLock::obj_offset_in_bytes())); + cbnz(Robj, unlock); + + pop(state); + + if (throw_monitor_exception) { + // Entry already unlocked, need to throw exception + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Monitor already unlocked during a stack unroll. + // If requested, install an illegal_monitor_state_exception. + // Continue with stack unrolling. + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); + } + b(unlocked); + } + + + // Exception case for the check that all monitors are unlocked. + const Register Rcur = R2; + Label restart_check_monitors_unlocked, exception_monitor_is_still_locked; + + bind(exception_monitor_is_still_locked); + // Monitor entry is still locked, need to throw exception. + // Rcur: monitor entry. + + if (throw_monitor_exception) { + // Throw exception + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); + should_not_reach_here(); + } else { + // Stack unrolling. Unlock object and install illegal_monitor_exception + // Unlock does not block, so don't have to worry about the frame + + push(state); + mov(R1, Rcur); + unlock_object(R1); + + if (install_monitor_exception) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); + } + + pop(state); + b(restart_check_monitors_unlocked); + } + + bind(unlock); + unlock_object(Rmonitor); + pop(state); + + // Check that for block-structured locking (i.e., that all locked objects has been unlocked) + bind(unlocked); + + // Check that all monitors are unlocked + { + Label loop; + + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + const Register Rbottom = R3; + const Register Rcur_obj = Rtemp; + + bind(restart_check_monitors_unlocked); + + ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); + // points to current entry, starting with top-most entry + sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize); + // points to word before bottom of monitor block + + cmp(Rcur, Rbottom); // check if there are no monitors +#ifndef AARCH64 + ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); + // prefetch monitor's object +#endif // !AARCH64 + b(no_unlock, eq); + + bind(loop); +#ifdef AARCH64 + ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes())); +#endif // AARCH64 + // check if current entry is used + cbnz(Rcur_obj, exception_monitor_is_still_locked); + + add(Rcur, Rcur, entry_size); // otherwise advance to next entry + cmp(Rcur, Rbottom); // check if bottom reached +#ifndef AARCH64 + ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); + // prefetch monitor's object +#endif // !AARCH64 + b(loop, ne); // if not at bottom then check this entry + } + + bind(no_unlock); + + // jvmti support + if (notify_jvmdi) { + notify_method_exit(state, NotifyJVMTI); // preserve TOSCA + } else { + notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA + } + + // remove activation +#ifdef AARCH64 + ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); + ldp(FP, LR, Address(FP)); + mov(SP, Rtemp); +#else + mov(Rtemp, FP); + ldmia(FP, RegisterSet(FP) | RegisterSet(LR)); + ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize)); +#endif + + if (ret_addr != LR) { + mov(ret_addr, LR); + } +} + + +// At certain points in the method invocation the monitor of +// synchronized methods hasn't been entered yet. +// To correctly handle exceptions at these points, we set the thread local +// variable _do_not_unlock_if_synchronized to true. The remove_activation will +// check this flag. +void InterpreterMacroAssembler::set_do_not_unlock_if_synchronized(bool flag, Register tmp) { + const Address do_not_unlock_if_synchronized(Rthread, + JavaThread::do_not_unlock_if_synchronized_offset()); + if (flag) { + mov(tmp, 1); + strb(tmp, do_not_unlock_if_synchronized); + } else { + strb(zero_register(tmp), do_not_unlock_if_synchronized); + } +} + +// Lock object +// +// Argument: R1 : Points to BasicObjectLock to be used for locking. +// Must be initialized with object to lock. +// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. +void InterpreterMacroAssembler::lock_object(Register Rlock) { + assert(Rlock == R1, "the second argument"); + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); + } else { + Label done; + + const Register Robj = R2; + const Register Rmark = R3; + assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); + + Label already_locked, slow_case; + + // Load object pointer + ldr(Robj, Address(Rlock, obj_offset)); + + if (UseBiasedLocking) { + biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case); + } + +#ifdef AARCH64 + assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); + ldr(Rmark, Robj); + + // Test if object is already locked + assert(markOopDesc::unlocked_value == 1, "adjust this code"); + tbz(Rmark, exact_log2(markOopDesc::unlocked_value), already_locked); + +#else // AARCH64 + + // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. + // That would be acceptable as ether CAS or slow case path is taken in that case. + // Exception to that is if the object is locked by the calling thread, then the recursive test will pass (guaranteed as + // loads are satisfied from a store queue if performed on the same processor). + + assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); + ldr(Rmark, Address(Robj, oopDesc::mark_offset_in_bytes())); + + // Test if object is already locked + tst(Rmark, markOopDesc::unlocked_value); + b(already_locked, eq); + +#endif // !AARCH64 + // Save old object->mark() into BasicLock's displaced header + str(Rmark, Address(Rlock, mark_offset)); + + cas_for_lock_acquire(Rmark, Rlock, Robj, Rtemp, slow_case); + +#ifndef PRODUCT + if (PrintBiasedLockingStatistics) { + cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr()); + } +#endif //!PRODUCT + + b(done); + + // If we got here that means the object is locked by ether calling thread or another thread. + bind(already_locked); + // Handling of locked objects: recursive locks and slow case. + + // Fast check for recursive lock. + // + // Can apply the optimization only if this is a stack lock + // allocated in this thread. For efficiency, we can focus on + // recently allocated stack locks (instead of reading the stack + // base and checking whether 'mark' points inside the current + // thread stack): + // 1) (mark & 3) == 0 + // 2) SP <= mark < SP + os::pagesize() + // + // Warning: SP + os::pagesize can overflow the stack base. We must + // neither apply the optimization for an inflated lock allocated + // just above the thread stack (this is why condition 1 matters) + // nor apply the optimization if the stack lock is inside the stack + // of another thread. The latter is avoided even in case of overflow + // because we have guard pages at the end of all stacks. Hence, if + // we go over the stack base and hit the stack of another thread, + // this should not be in a writeable area that could contain a + // stack lock allocated by that thread. As a consequence, a stack + // lock less than page size away from SP is guaranteed to be + // owned by the current thread. + // + // Note: assuming SP is aligned, we can check the low bits of + // (mark-SP) instead of the low bits of mark. In that case, + // assuming page size is a power of 2, we can merge the two + // conditions into a single test: + // => ((mark - SP) & (3 - os::pagesize())) == 0 + +#ifdef AARCH64 + // Use the single check since the immediate is OK for AARCH64 + sub(R0, Rmark, Rstack_top); + intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); + Assembler::LogicalImmediate imm(mask, false); + ands(R0, R0, imm); + + // For recursive case store 0 into lock record. + // It is harmless to store it unconditionally as lock record contains some garbage + // value in its _displaced_header field by this moment. + str(ZR, Address(Rlock, mark_offset)); + +#else // AARCH64 + // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand. + // Check independently the low bits and the distance to SP. + // -1- test low 2 bits + movs(R0, AsmOperand(Rmark, lsl, 30)); + // -2- test (mark - SP) if the low two bits are 0 + sub(R0, Rmark, SP, eq); + movs(R0, AsmOperand(R0, lsr, exact_log2(os::vm_page_size())), eq); + // If still 'eq' then recursive locking OK: store 0 into lock record + str(R0, Address(Rlock, mark_offset), eq); + +#endif // AARCH64 + +#ifndef PRODUCT + if (PrintBiasedLockingStatistics) { + cond_atomic_inc32(eq, BiasedLocking::fast_path_entry_count_addr()); + } +#endif // !PRODUCT + + b(done, eq); + + bind(slow_case); + + // Call the runtime routine for slow case + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); + + bind(done); + } +} + + +// Unlocks an object. Used in monitorexit bytecode and remove_activation. +// +// Argument: R1: Points to BasicObjectLock structure for lock +// Throw an IllegalMonitorException if object is not locked by current thread +// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. +void InterpreterMacroAssembler::unlock_object(Register Rlock) { + assert(Rlock == R1, "the second argument"); + + if (UseHeavyMonitors) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); + } else { + Label done, slow_case; + + const Register Robj = R2; + const Register Rmark = R3; + const Register Rresult = R0; + assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); + + const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); + const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); + const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); + + const Register Rzero = zero_register(Rtemp); + + // Load oop into Robj + ldr(Robj, Address(Rlock, obj_offset)); + + // Free entry + str(Rzero, Address(Rlock, obj_offset)); + + if (UseBiasedLocking) { + biased_locking_exit(Robj, Rmark, done); + } + + // Load the old header from BasicLock structure + ldr(Rmark, Address(Rlock, mark_offset)); + + // Test for recursion (zero mark in BasicLock) + cbz(Rmark, done); + + bool allow_fallthrough_on_failure = true; + + cas_for_lock_release(Rlock, Rmark, Robj, Rtemp, slow_case, allow_fallthrough_on_failure); + + b(done, eq); + + bind(slow_case); + + // Call the runtime routine for slow case. + str(Robj, Address(Rlock, obj_offset)); // restore obj + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); + + bind(done); + } +} + + +// Test ImethodDataPtr. If it is null, continue at the specified label +void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + ldr(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); + cbz(mdp, zero_continue); +} + + +// Set the method data pointer for the current bcp. +// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. +void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { + assert(ProfileInterpreter, "must be profiling interpreter"); + Label set_mdp; + + // Test MDO to avoid the call if it is NULL. + ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); + cbz(Rtemp, set_mdp); + + mov(R0, Rmethod); + mov(R1, Rbcp); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), R0, R1); + // R0/W0: mdi + + // mdo is guaranteed to be non-zero here, we checked for it before the call. + ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); + add(Rtemp, Rtemp, in_bytes(MethodData::data_offset())); + add_ptr_scaled_int32(Rtemp, Rtemp, R0, 0); + + bind(set_mdp); + str(Rtemp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::verify_method_data_pointer() { + assert(ProfileInterpreter, "must be profiling interpreter"); +#ifdef ASSERT + Label verify_continue; + save_caller_save_registers(); + + const Register Rmdp = R2; + test_method_data_pointer(Rmdp, verify_continue); // If mdp is zero, continue + + // If the mdp is valid, it will point to a DataLayout header which is + // consistent with the bcp. The converse is highly probable also. + + ldrh(R3, Address(Rmdp, DataLayout::bci_offset())); + ldr(Rtemp, Address(Rmethod, Method::const_offset())); + add(R3, R3, Rtemp); + add(R3, R3, in_bytes(ConstMethod::codes_offset())); + cmp(R3, Rbcp); + b(verify_continue, eq); + + mov(R0, Rmethod); + mov(R1, Rbcp); + call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), R0, R1, Rmdp); + + bind(verify_continue); + restore_caller_save_registers(); +#endif // ASSERT +} + + +void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, int offset, Register value) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(mdp_in, value); + str(value, Address(mdp_in, offset)); +} + + +// Increments mdp data. Sets bumped_count register to adjusted counter. +void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, + int offset, + Register bumped_count, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + + // Counter address + Address data(mdp_in, offset); + assert_different_registers(mdp_in, bumped_count); + + increment_mdp_data_at(data, bumped_count, decrement); +} + +void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, int flag_byte_constant) { + assert_different_registers(mdp_in, Rtemp); + assert(ProfileInterpreter, "must be profiling interpreter"); + assert((0 < flag_byte_constant) && (flag_byte_constant < (1 << BitsPerByte)), "flag mask is out of range"); + + // Set the flag + ldrb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); + orr(Rtemp, Rtemp, (unsigned)flag_byte_constant); + strb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); +} + + +// Increments mdp data. Sets bumped_count register to adjusted counter. +void InterpreterMacroAssembler::increment_mdp_data_at(Address data, + Register bumped_count, + bool decrement) { + assert(ProfileInterpreter, "must be profiling interpreter"); + + ldr(bumped_count, data); + if (decrement) { + // Decrement the register. Set condition codes. + subs(bumped_count, bumped_count, DataLayout::counter_increment); + // Avoid overflow. +#ifdef AARCH64 + assert(DataLayout::counter_increment == 1, "required for cinc"); + cinc(bumped_count, bumped_count, pl); +#else + add(bumped_count, bumped_count, DataLayout::counter_increment, pl); +#endif // AARCH64 + } else { + // Increment the register. Set condition codes. + adds(bumped_count, bumped_count, DataLayout::counter_increment); + // Avoid overflow. +#ifdef AARCH64 + assert(DataLayout::counter_increment == 1, "required for cinv"); + cinv(bumped_count, bumped_count, mi); // inverts 0x80..00 back to 0x7f..ff +#else + sub(bumped_count, bumped_count, DataLayout::counter_increment, mi); +#endif // AARCH64 + } + str(bumped_count, data); +} + + +void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, + int offset, + Register value, + Register test_value_out, + Label& not_equal_continue) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(mdp_in, test_value_out, value); + + ldr(test_value_out, Address(mdp_in, offset)); + cmp(test_value_out, value); + + b(not_equal_continue, ne); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, int offset_of_disp, Register reg_temp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(mdp_in, reg_temp); + + ldr(reg_temp, Address(mdp_in, offset_of_disp)); + add(mdp_in, mdp_in, reg_temp); + str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, Register reg_offset, Register reg_tmp) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(mdp_in, reg_offset, reg_tmp); + + ldr(reg_tmp, Address(mdp_in, reg_offset)); + add(mdp_in, mdp_in, reg_tmp); + str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) { + assert(ProfileInterpreter, "must be profiling interpreter"); + add(mdp_in, mdp_in, constant); + str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); +} + + +// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { + assert(ProfileInterpreter, "must be profiling interpreter"); + assert_different_registers(return_bci, R0, R1, R2, R3, Rtemp); + + mov(R1, return_bci); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), R1); +} + + +// Sets mdp, bumped_count registers, blows Rtemp. +void InterpreterMacroAssembler::profile_taken_branch(Register mdp, Register bumped_count) { + assert_different_registers(mdp, bumped_count); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + // Otherwise, assign to mdp + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the taken count. + increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()), bumped_count); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()), Rtemp); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { + assert_different_registers(mdp, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are taking a branch. Increment the not taken count. + increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()), Rtemp); + + // The method data pointer needs to be updated to correspond to the next bytecode + update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_call(Register mdp) { + assert_different_registers(mdp, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_final_call(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // We are making a call. Increment the count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_virtual_call(Register mdp, Register receiver, bool receiver_can_be_null) { + assert_different_registers(mdp, receiver, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + Label skip_receiver_profile; + if (receiver_can_be_null) { + Label not_null; + cbnz(receiver, not_null); + // We are making a call. Increment the count for null receiver. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); + b(skip_receiver_profile); + bind(not_null); + } + + // Record the receiver type. + record_klass_in_profile(receiver, mdp, Rtemp, true); + bind(skip_receiver_profile); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); + bind(profile_continue); + } +} + + +void InterpreterMacroAssembler::record_klass_in_profile_helper( + Register receiver, Register mdp, + Register reg_tmp, + int start_row, Label& done, bool is_virtual_call) { + if (TypeProfileWidth == 0) + return; + + assert_different_registers(receiver, mdp, reg_tmp); + + int last_row = VirtualCallData::row_limit() - 1; + assert(start_row <= last_row, "must be work left to do"); + // Test this row for both the receiver and for null. + // Take any of three different outcomes: + // 1. found receiver => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. + for (int row = start_row; row <= last_row; row++) { + Label next_test; + + // See if the receiver is receiver[n]. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); + + test_mdp_data_at(mdp, recvr_offset, receiver, reg_tmp, next_test); + + // The receiver is receiver[n]. Increment count[n]. + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); + increment_mdp_data_at(mdp, count_offset, reg_tmp); + b(done); + + bind(next_test); + // reg_tmp now contains the receiver from the CallData. + + if (row == start_row) { + Label found_null; + // Failed the equality check on receiver[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. + if (is_virtual_call) { + cbz(reg_tmp, found_null); + // Receiver did not match any saved receiver and there is no empty row for it. + // Increment total counter to indicate polymorphic case. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), reg_tmp); + b(done); + bind(found_null); + } else { + cbnz(reg_tmp, done); + } + break; + } + // Since null is rare, make it be the branch-taken case. + cbz(reg_tmp, found_null); + + // Put all the "Case 3" tests here. + record_klass_in_profile_helper(receiver, mdp, reg_tmp, start_row + 1, done, is_virtual_call); + + // Found a null. Keep searching for a matching receiver, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + + // In the fall-through case, we found no matching receiver, but we + // observed the receiver[start_row] is NULL. + + // Fill in the receiver field and increment the count. + int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); + set_mdp_data_at(mdp, recvr_offset, receiver); + int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); + mov(reg_tmp, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg_tmp); + if (start_row > 0) { + b(done); + } +} + +void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, + Register mdp, + Register reg_tmp, + bool is_virtual_call) { + assert(ProfileInterpreter, "must be profiling"); + assert_different_registers(receiver, mdp, reg_tmp); + + Label done; + + record_klass_in_profile_helper(receiver, mdp, reg_tmp, 0, done, is_virtual_call); + + bind (done); +} + +// Sets mdp, blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +void InterpreterMacroAssembler::profile_ret(Register mdp, Register return_bci) { + assert_different_registers(mdp, return_bci, Rtemp, R0, R1, R2, R3); + + if (ProfileInterpreter) { + Label profile_continue; + uint row; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the total ret count. + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); + + for (row = 0; row < RetData::row_limit(); row++) { + Label next_test; + + // See if return_bci is equal to bci[n]: + test_mdp_data_at(mdp, in_bytes(RetData::bci_offset(row)), return_bci, + Rtemp, next_test); + + // return_bci is equal to bci[n]. Increment the count. + increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)), Rtemp); + + // The method data pointer needs to be updated to reflect the new target. + update_mdp_by_offset(mdp, in_bytes(RetData::bci_displacement_offset(row)), Rtemp); + b(profile_continue); + bind(next_test); + } + + update_mdp_for_ret(return_bci); + + bind(profile_continue); + } +} + + +// Sets mdp. +void InterpreterMacroAssembler::profile_null_seen(Register mdp) { + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { + assert_different_registers(mdp, Rtemp); + + if (ProfileInterpreter && TypeProfileCasts) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + int count_offset = in_bytes(CounterData::count_offset()); + // Back up the address, since we have already bumped the mdp. + count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); + + // *Decrement* the counter. We expect to see zero or small negatives. + increment_mdp_data_at(mdp, count_offset, Rtemp, true); + + bind (profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass) +{ + assert_different_registers(mdp, klass, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // The method data pointer needs to be updated. + int mdp_delta = in_bytes(BitData::bit_data_size()); + if (TypeProfileCasts) { + mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); + + // Record the object type. + record_klass_in_profile(klass, mdp, Rtemp, false); + } + update_mdp_by_constant(mdp, mdp_delta); + + bind(profile_continue); + } +} + + +// Sets mdp, blows Rtemp. +void InterpreterMacroAssembler::profile_switch_default(Register mdp) { + assert_different_registers(mdp, Rtemp); + + if (ProfileInterpreter) { + Label profile_continue; + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Update the default case count + increment_mdp_data_at(mdp, in_bytes(MultiBranchData::default_count_offset()), Rtemp); + + // The method data pointer needs to be updated. + update_mdp_by_offset(mdp, in_bytes(MultiBranchData::default_displacement_offset()), Rtemp); + + bind(profile_continue); + } +} + + +// Sets mdp. Blows reg_tmp1, reg_tmp2. Index could be the same as reg_tmp2. +void InterpreterMacroAssembler::profile_switch_case(Register mdp, Register index, Register reg_tmp1, Register reg_tmp2) { + assert_different_registers(mdp, reg_tmp1, reg_tmp2); + assert_different_registers(mdp, reg_tmp1, index); + + if (ProfileInterpreter) { + Label profile_continue; + + const int count_offset = in_bytes(MultiBranchData::case_array_offset()) + + in_bytes(MultiBranchData::relative_count_offset()); + + const int displacement_offset = in_bytes(MultiBranchData::case_array_offset()) + + in_bytes(MultiBranchData::relative_displacement_offset()); + + // If no method data exists, go to profile_continue. + test_method_data_pointer(mdp, profile_continue); + + // Build the base (index * per_case_size_in_bytes()) + logical_shift_left(reg_tmp1, index, exact_log2(in_bytes(MultiBranchData::per_case_size()))); + + // Update the case count + add(reg_tmp1, reg_tmp1, count_offset); + increment_mdp_data_at(Address(mdp, reg_tmp1), reg_tmp2); + + // The method data pointer needs to be updated. + add(reg_tmp1, reg_tmp1, displacement_offset - count_offset); + update_mdp_by_offset(mdp, reg_tmp1, reg_tmp2); + + bind (profile_continue); + } +} + + +void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Register rtmp2) { +#ifdef AARCH64 + rev_w(r, r); +#else + if (VM_Version::supports_rev()) { + rev(r, r); + } else { + eor(rtmp1, r, AsmOperand(r, ror, 16)); + mvn(rtmp2, 0x0000ff00); + andr(rtmp1, rtmp2, AsmOperand(rtmp1, lsr, 8)); + eor(r, rtmp1, AsmOperand(r, ror, 8)); + } +#endif // AARCH64 +} + + +void InterpreterMacroAssembler::inc_global_counter(address address_of_counter, int offset, Register tmp1, Register tmp2, bool avoid_overflow) { + const intx addr = (intx) (address_of_counter + offset); + + assert ((addr & 0x3) == 0, "address of counter should be aligned"); + const intx offset_mask = right_n_bits(AARCH64_ONLY(12 + 2) NOT_AARCH64(12)); + + const address base = (address) (addr & ~offset_mask); + const int offs = (int) (addr & offset_mask); + + const Register addr_base = tmp1; + const Register val = tmp2; + + mov_slow(addr_base, base); + ldr_s32(val, Address(addr_base, offs)); + + if (avoid_overflow) { + adds_32(val, val, 1); +#ifdef AARCH64 + Label L; + b(L, mi); + str_32(val, Address(addr_base, offs)); + bind(L); +#else + str(val, Address(addr_base, offs), pl); +#endif // AARCH64 + } else { + add_32(val, val, 1); + str_32(val, Address(addr_base, offs)); + } +} + +void InterpreterMacroAssembler::interp_verify_oop(Register reg, TosState state, const char *file, int line) { + if (state == atos) { MacroAssembler::_verify_oop(reg, "broken oop", file, line); } +} + +// Inline assembly for: +// +// if (thread is in interp_only_mode) { +// InterpreterRuntime::post_method_entry(); +// } +// if (DTraceMethodProbes) { +// SharedRuntime::dtrace_method_entry(method, receiver); +// } +// if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { +// SharedRuntime::rc_trace_method_entry(method, receiver); +// } + +void InterpreterMacroAssembler::notify_method_entry() { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (can_post_interpreter_events()) { + Label L; + + ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); + cbz(Rtemp, L); + + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry)); + + bind(L); + } + + // Note: Disable DTrace runtime check for now to eliminate overhead on each method entry + if (DTraceMethodProbes) { + Label Lcontinue; + + ldrb_global(Rtemp, (address)&DTraceMethodProbes); + cbz(Rtemp, Lcontinue); + + mov(R0, Rthread); + mov(R1, Rmethod); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), R0, R1); + + bind(Lcontinue); + } + // RedefineClasses() tracing support for obsolete method entry + if (log_is_enabled(Trace, redefine, class, obsolete)) { + mov(R0, Rthread); + mov(R1, Rmethod); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), + R0, R1); + } +} + + +void InterpreterMacroAssembler::notify_method_exit( + TosState state, NotifyMethodExitMode mode, + bool native, Register result_lo, Register result_hi, FloatRegister result_fp) { + // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to + // track stack depth. If it is possible to enter interp_only_mode we add + // the code to check if the event should be sent. + if (mode == NotifyJVMTI && can_post_interpreter_events()) { + Label L; + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. If this + // is changed then the interpreter_frame_result implementation will + // need to be updated too. + + ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); + cbz(Rtemp, L); + + if (native) { + // For c++ and template interpreter push both result registers on the + // stack in native, we don't know the state. + // On AArch64 result registers are stored into the frame at known locations. + // See frame::interpreter_frame_result for code that gets the result values from here. + assert(result_lo != noreg, "result registers should be defined"); + +#ifdef AARCH64 + assert(result_hi == noreg, "result_hi is not used on AArch64"); + assert(result_fp != fnoreg, "FP result register must be defined"); + + str_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); + str(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); +#else + assert(result_hi != noreg, "result registers should be defined"); + +#ifdef __ABI_HARD__ + assert(result_fp != fnoreg, "FP result register must be defined"); + sub(SP, SP, 2 * wordSize); + fstd(result_fp, Address(SP)); +#endif // __ABI_HARD__ + + push(RegisterSet(result_lo) | RegisterSet(result_hi)); +#endif // AARCH64 + + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + +#ifdef AARCH64 + ldr_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); + ldr(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); +#else + pop(RegisterSet(result_lo) | RegisterSet(result_hi)); +#ifdef __ABI_HARD__ + fldd(result_fp, Address(SP)); + add(SP, SP, 2 * wordSize); +#endif // __ABI_HARD__ +#endif // AARCH64 + + } else { + // For the template interpreter, the value on tos is the size of the + // state. (c++ interpreter calls jvmti somewhere else). + push(state); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); + pop(state); + } + + bind(L); + } + + // Note: Disable DTrace runtime check for now to eliminate overhead on each method exit + if (DTraceMethodProbes) { + Label Lcontinue; + + ldrb_global(Rtemp, (address)&DTraceMethodProbes); + cbz(Rtemp, Lcontinue); + + push(state); + + mov(R0, Rthread); + mov(R1, Rmethod); + + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), R0, R1); + + pop(state); + + bind(Lcontinue); + } +} + + +#ifndef PRODUCT + +void InterpreterMacroAssembler::trace_state(const char* msg) { + int push_size = save_caller_save_registers(); + + Label Lcontinue; + InlinedString Lmsg0("%s: FP=" INTPTR_FORMAT ", SP=" INTPTR_FORMAT "\n"); + InlinedString Lmsg(msg); + InlinedAddress Lprintf((address)printf); + + ldr_literal(R0, Lmsg0); + ldr_literal(R1, Lmsg); + mov(R2, FP); + add(R3, SP, push_size); // original SP (without saved registers) + ldr_literal(Rtemp, Lprintf); + call(Rtemp); + + b(Lcontinue); + + bind_literal(Lmsg0); + bind_literal(Lmsg); + bind_literal(Lprintf); + + + bind(Lcontinue); + + restore_caller_save_registers(); +} + +#endif + +// Jump if ((*counter_addr += increment) & mask) satisfies the condition. +void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, + int increment, Address mask_addr, + Register scratch, Register scratch2, + AsmCondition cond, Label* where) { + // caution: scratch2 and base address of counter_addr can be the same + assert_different_registers(scratch, scratch2); + ldr_u32(scratch, counter_addr); + add(scratch, scratch, increment); + str_32(scratch, counter_addr); + +#ifdef AARCH64 + ldr_u32(scratch2, mask_addr); + ands_w(ZR, scratch, scratch2); +#else + ldr(scratch2, mask_addr); + andrs(scratch, scratch, scratch2); +#endif // AARCH64 + b(*where, cond); +} + +void InterpreterMacroAssembler::get_method_counters(Register method, + Register Rcounters, + Label& skip) { + const Address method_counters(method, Method::method_counters_offset()); + Label has_counters; + + ldr(Rcounters, method_counters); + cbnz(Rcounters, has_counters); + +#ifdef AARCH64 + const Register tmp = Rcounters; + const int saved_regs_size = 20*wordSize; + + // Note: call_VM will cut SP according to Rstack_top value before call, and restore SP to + // extended_sp value from frame after the call. + // So make sure there is enough stack space to save registers and adjust Rstack_top accordingly. + { + Label enough_stack_space; + check_extended_sp(tmp); + sub(Rstack_top, Rstack_top, saved_regs_size); + cmp(SP, Rstack_top); + b(enough_stack_space, ls); + + align_reg(tmp, Rstack_top, StackAlignmentInBytes); + mov(SP, tmp); + str(tmp, Address(FP, frame::interpreter_frame_extended_sp_offset * wordSize)); + + bind(enough_stack_space); + check_stack_top(); + + int offset = 0; + stp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize; + stp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize; + assert (offset == saved_regs_size, "should be"); + } +#else + push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14)); +#endif // AARCH64 + + mov(R1, method); + call_VM(noreg, CAST_FROM_FN_PTR(address, + InterpreterRuntime::build_method_counters), R1); + +#ifdef AARCH64 + { + int offset = 0; + ldp(R0, R1, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R2, R3, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R4, R5, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R6, R7, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R8, R9, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R10, R11, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R12, R13, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R14, R15, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R16, R17, Address(Rstack_top, offset)); offset += 2*wordSize; + ldp(R18, LR, Address(Rstack_top, offset)); offset += 2*wordSize; + assert (offset == saved_regs_size, "should be"); + + add(Rstack_top, Rstack_top, saved_regs_size); + } +#else + pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(R14)); +#endif // AARCH64 + + ldr(Rcounters, method_counters); + cbz(Rcounters, skip); // No MethodCounters created, OutOfMemory + + bind(has_counters); +}