/* * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #include "precompiled.hpp" #include "gc/shared/barrierSet.inline.hpp" #include "gc/shared/cardTableModRefBS.inline.hpp" #include "gc/shared/collectedHeap.hpp" #include "interp_masm_arm.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" #include "logging/log.hpp" #include "oops/arrayOop.hpp" #include "oops/markOop.hpp" #include "oops/method.hpp" #include "oops/methodData.hpp" #include "prims/jvmtiExport.hpp" #include "prims/jvmtiThreadState.hpp" #include "runtime/basicLock.hpp" #include "runtime/biasedLocking.hpp" #include "runtime/sharedRuntime.hpp" #if INCLUDE_ALL_GCS #include "gc/g1/g1CollectedHeap.inline.hpp" #include "gc/g1/g1SATBCardTableModRefBS.hpp" #include "gc/g1/heapRegion.hpp" #endif // INCLUDE_ALL_GCS //-------------------------------------------------------------------- // Implementation of InterpreterMacroAssembler InterpreterMacroAssembler::InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) { } void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { #if defined(ASSERT) && !defined(AARCH64) // Ensure that last_sp is not filled. { Label L; ldr(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); cbz(Rtemp, L); stop("InterpreterMacroAssembler::call_VM_helper: last_sp != NULL"); bind(L); } #endif // ASSERT && !AARCH64 // Rbcp must be saved/restored since it may change due to GC. save_bcp(); #ifdef AARCH64 check_no_cached_stack_top(Rtemp); save_stack_top(); check_extended_sp(Rtemp); cut_sp_before_call(); #endif // AARCH64 // super call MacroAssembler::call_VM_helper(oop_result, entry_point, number_of_arguments, check_exceptions); #ifdef AARCH64 // Restore SP to extended SP restore_sp_after_call(Rtemp); check_stack_top(); clear_cached_stack_top(); #endif // AARCH64 // Restore interpreter specific registers. restore_bcp(); restore_method(); } void InterpreterMacroAssembler::jump_to_entry(address entry) { assert(entry, "Entry must have been generated by now"); b(entry); } void InterpreterMacroAssembler::check_and_handle_popframe() { if (can_pop_frame()) { Label L; const Register popframe_cond = R2_tmp; // Initiate popframe handling only if it is not already being processed. If the flag // has the popframe_processing bit set, it means that this code is called *during* popframe // handling - we don't want to reenter. ldr_s32(popframe_cond, Address(Rthread, JavaThread::popframe_condition_offset())); tbz(popframe_cond, exact_log2(JavaThread::popframe_pending_bit), L); tbnz(popframe_cond, exact_log2(JavaThread::popframe_processing_bit), L); // Call Interpreter::remove_activation_preserving_args_entry() to get the // address of the same-named entrypoint in the generated interpreter code. call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); // Call indirectly to avoid generation ordering problem. jump(R0); bind(L); } } // Blows R2, Rtemp. Sets TOS cached value. void InterpreterMacroAssembler::load_earlyret_value(TosState state) { const Register thread_state = R2_tmp; ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); const Address tos_addr(thread_state, JvmtiThreadState::earlyret_tos_offset()); const Address oop_addr(thread_state, JvmtiThreadState::earlyret_oop_offset()); const Address val_addr(thread_state, JvmtiThreadState::earlyret_value_offset()); #ifndef AARCH64 const Address val_addr_hi(thread_state, JvmtiThreadState::earlyret_value_offset() + in_ByteSize(wordSize)); #endif // !AARCH64 Register zero = zero_register(Rtemp); switch (state) { case atos: ldr(R0_tos, oop_addr); str(zero, oop_addr); interp_verify_oop(R0_tos, state, __FILE__, __LINE__); break; #ifdef AARCH64 case ltos: ldr(R0_tos, val_addr); break; #else case ltos: ldr(R1_tos_hi, val_addr_hi); // fall through #endif // AARCH64 case btos: // fall through case ztos: // fall through case ctos: // fall through case stos: // fall through case itos: ldr_s32(R0_tos, val_addr); break; #ifdef __SOFTFP__ case dtos: ldr(R1_tos_hi, val_addr_hi); // fall through case ftos: ldr(R0_tos, val_addr); break; #else case ftos: ldr_float (S0_tos, val_addr); break; case dtos: ldr_double(D0_tos, val_addr); break; #endif // __SOFTFP__ case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } // Clean up tos value in the thread object str(zero, val_addr); #ifndef AARCH64 str(zero, val_addr_hi); #endif // !AARCH64 mov(Rtemp, (int) ilgl); str_32(Rtemp, tos_addr); } // Blows R2, Rtemp. void InterpreterMacroAssembler::check_and_handle_earlyret() { if (can_force_early_return()) { Label L; const Register thread_state = R2_tmp; ldr(thread_state, Address(Rthread, JavaThread::jvmti_thread_state_offset())); cbz(thread_state, L); // if (thread->jvmti_thread_state() == NULL) exit; // Initiate earlyret handling only if it is not already being processed. // If the flag has the earlyret_processing bit set, it means that this code // is called *during* earlyret handling - we don't want to reenter. ldr_s32(Rtemp, Address(thread_state, JvmtiThreadState::earlyret_state_offset())); cmp(Rtemp, JvmtiThreadState::earlyret_pending); b(L, ne); // Call Interpreter::remove_activation_early_entry() to get the address of the // same-named entrypoint in the generated interpreter code. ldr_s32(R0, Address(thread_state, JvmtiThreadState::earlyret_tos_offset())); call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), R0); jump(R0); bind(L); } } // Sets reg. Blows Rtemp. void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset) { assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); assert(reg != Rtemp, "should be different registers"); ldrb(Rtemp, Address(Rbcp, bcp_offset)); ldrb(reg, Address(Rbcp, bcp_offset+1)); orr(reg, reg, AsmOperand(Rtemp, lsl, BitsPerByte)); } void InterpreterMacroAssembler::get_index_at_bcp(Register index, int bcp_offset, Register tmp_reg, size_t index_size) { assert_different_registers(index, tmp_reg); if (index_size == sizeof(u2)) { // load bytes of index separately to avoid unaligned access ldrb(index, Address(Rbcp, bcp_offset+1)); ldrb(tmp_reg, Address(Rbcp, bcp_offset)); orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); } else if (index_size == sizeof(u4)) { // TODO-AARCH64: consider using unaligned access here ldrb(index, Address(Rbcp, bcp_offset+3)); ldrb(tmp_reg, Address(Rbcp, bcp_offset+2)); orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); ldrb(tmp_reg, Address(Rbcp, bcp_offset+1)); orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); ldrb(tmp_reg, Address(Rbcp, bcp_offset)); orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); // Check if the secondary index definition is still ~x, otherwise // we have to change the following assembler code to calculate the // plain index. assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); mvn_32(index, index); // convert to plain index } else if (index_size == sizeof(u1)) { ldrb(index, Address(Rbcp, bcp_offset)); } else { ShouldNotReachHere(); } } // Sets cache, index. void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size) { assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); assert_different_registers(cache, index); get_index_at_bcp(index, bcp_offset, cache, index_size); // load constant pool cache pointer ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); // convert from field index to ConstantPoolCacheEntry index assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); // TODO-AARCH64 merge this shift with shift "add(..., Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord))" after this method is called logical_shift_left(index, index, 2); } // Sets cache, index, bytecode. void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size) { get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); // caution index and bytecode can be the same add(bytecode, cache, AsmOperand(index, lsl, LogBytesPerWord)); #ifdef AARCH64 add(bytecode, bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ldarb(bytecode, bytecode); #else ldrb(bytecode, Address(bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()))); TemplateTable::volatile_barrier(MacroAssembler::LoadLoad, noreg, true); #endif // AARCH64 } // Sets cache. Blows reg_tmp. void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, Register reg_tmp, int bcp_offset, size_t index_size) { assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); assert_different_registers(cache, reg_tmp); get_index_at_bcp(reg_tmp, bcp_offset, cache, index_size); // load constant pool cache pointer ldr(cache, Address(FP, frame::interpreter_frame_cache_offset * wordSize)); // skip past the header add(cache, cache, in_bytes(ConstantPoolCache::base_offset())); // convert from field index to ConstantPoolCacheEntry index // and from word offset to byte offset assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); add(cache, cache, AsmOperand(reg_tmp, lsl, 2 + LogBytesPerWord)); } // Load object from cpool->resolved_references(index) void InterpreterMacroAssembler::load_resolved_reference_at_index( Register result, Register index) { assert_different_registers(result, index); get_constant_pool(result); Register cache = result; // load pointer for resolved_references[] objArray ldr(cache, Address(result, ConstantPool::cache_offset_in_bytes())); ldr(cache, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes())); resolve_oop_handle(cache); // Add in the index // convert from field index to resolved_references() index and from // word index to byte offset. Since this is a java object, it can be compressed add(cache, cache, AsmOperand(index, lsl, LogBytesPerHeapOop)); load_heap_oop(result, Address(cache, arrayOopDesc::base_offset_in_bytes(T_OBJECT))); } void InterpreterMacroAssembler::load_resolved_klass_at_offset( Register Rcpool, Register Rindex, Register Rklass) { add(Rtemp, Rcpool, AsmOperand(Rindex, lsl, LogBytesPerWord)); ldrh(Rtemp, Address(Rtemp, sizeof(ConstantPool))); // Rtemp = resolved_klass_index ldr(Rklass, Address(Rcpool, ConstantPool::resolved_klasses_offset_in_bytes())); // Rklass = cpool->_resolved_klasses add(Rklass, Rklass, AsmOperand(Rtemp, lsl, LogBytesPerWord)); ldr(Rklass, Address(Rklass, Array::base_offset_in_bytes())); } // Generate a subtype check: branch to not_subtype if sub_klass is // not a subtype of super_klass. // Profiling code for the subtype check failure (profile_typecheck_failed) // should be explicitly generated by the caller in the not_subtype case. // Blows Rtemp, tmp1, tmp2. void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, Register Rsuper_klass, Label ¬_subtype, Register tmp1, Register tmp2) { assert_different_registers(Rsub_klass, Rsuper_klass, tmp1, tmp2, Rtemp); Label ok_is_subtype, loop, update_cache; const Register super_check_offset = tmp1; const Register cached_super = tmp2; // Profile the not-null value's klass. profile_typecheck(tmp1, Rsub_klass); // Load the super-klass's check offset into ldr_u32(super_check_offset, Address(Rsuper_klass, Klass::super_check_offset_offset())); // Check for self cmp(Rsub_klass, Rsuper_klass); // Load from the sub-klass's super-class display list, or a 1-word cache of // the secondary superclass list, or a failing value with a sentinel offset // if the super-klass is an interface or exceptionally deep in the Java // hierarchy and we have to scan the secondary superclass list the hard way. // See if we get an immediate positive hit ldr(cached_super, Address(Rsub_klass, super_check_offset)); cond_cmp(Rsuper_klass, cached_super, ne); b(ok_is_subtype, eq); // Check for immediate negative hit cmp(super_check_offset, in_bytes(Klass::secondary_super_cache_offset())); b(not_subtype, ne); // Now do a linear scan of the secondary super-klass chain. const Register supers_arr = tmp1; const Register supers_cnt = tmp2; const Register cur_super = Rtemp; // Load objArrayOop of secondary supers. ldr(supers_arr, Address(Rsub_klass, Klass::secondary_supers_offset())); ldr_u32(supers_cnt, Address(supers_arr, Array::length_offset_in_bytes())); // Load the array length #ifdef AARCH64 cbz(supers_cnt, not_subtype); add(supers_arr, supers_arr, Array::base_offset_in_bytes()); #else cmp(supers_cnt, 0); // Skip to the start of array elements and prefetch the first super-klass. ldr(cur_super, Address(supers_arr, Array::base_offset_in_bytes(), pre_indexed), ne); b(not_subtype, eq); #endif // AARCH64 bind(loop); #ifdef AARCH64 ldr(cur_super, Address(supers_arr, wordSize, post_indexed)); #endif // AARCH64 cmp(cur_super, Rsuper_klass); b(update_cache, eq); subs(supers_cnt, supers_cnt, 1); #ifndef AARCH64 ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne); #endif // !AARCH64 b(loop, ne); b(not_subtype); bind(update_cache); // Must be equal but missed in cache. Update cache. str(Rsuper_klass, Address(Rsub_klass, Klass::secondary_super_cache_offset())); bind(ok_is_subtype); } // The 1st part of the store check. // Sets card_table_base register. void InterpreterMacroAssembler::store_check_part1(Register card_table_base) { // Check barrier set type (should be card table) and element size BarrierSet* bs = Universe::heap()->barrier_set(); assert(bs->kind() == BarrierSet::CardTableForRS || bs->kind() == BarrierSet::CardTableExtension, "Wrong barrier set kind"); CardTableModRefBS* ct = barrier_set_cast(bs); assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "Adjust store check code"); // Load card table base address. /* Performance note. There is an alternative way of loading card table base address from thread descriptor, which may look more efficient: ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset())); However, performance measurements of micro benchmarks and specJVM98 showed that loading of card table base from thread descriptor is 7-18% slower compared to loading of literal embedded into the code. Possible cause is a cache miss (card table base address resides in a rarely accessed area of thread descriptor). */ // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64 mov_address(card_table_base, (address)ct->byte_map_base, symbolic_Relocation::card_table_reference); } // The 2nd part of the store check. void InterpreterMacroAssembler::store_check_part2(Register obj, Register card_table_base, Register tmp) { assert_different_registers(obj, card_table_base, tmp); assert(CardTableModRefBS::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations."); #ifdef AARCH64 add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTableModRefBS::card_shift)); Address card_table_addr(card_table_base); #else Address card_table_addr(card_table_base, obj, lsr, CardTableModRefBS::card_shift); #endif if (UseCondCardMark) { if (UseConcMarkSweepGC) { membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), noreg); } Label already_dirty; ldrb(tmp, card_table_addr); cbz(tmp, already_dirty); set_card(card_table_base, card_table_addr, tmp); bind(already_dirty); } else { if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg); } set_card(card_table_base, card_table_addr, tmp); } } void InterpreterMacroAssembler::set_card(Register card_table_base, Address card_table_addr, Register tmp) { #ifdef AARCH64 strb(ZR, card_table_addr); #else CardTableModRefBS* ct = barrier_set_cast(Universe::heap()->barrier_set()); if ((((uintptr_t)ct->byte_map_base & 0xff) == 0)) { // Card table is aligned so the lowest byte of the table address base is zero. // This works only if the code is not saved for later use, possibly // in a context where the base would no longer be aligned. strb(card_table_base, card_table_addr); } else { mov(tmp, 0); strb(tmp, card_table_addr); } #endif // AARCH64 } ////////////////////////////////////////////////////////////////////////////////// // Java Expression Stack void InterpreterMacroAssembler::pop_ptr(Register r) { assert(r != Rstack_top, "unpredictable instruction"); ldr(r, Address(Rstack_top, wordSize, post_indexed)); } void InterpreterMacroAssembler::pop_i(Register r) { assert(r != Rstack_top, "unpredictable instruction"); ldr_s32(r, Address(Rstack_top, wordSize, post_indexed)); zap_high_non_significant_bits(r); } #ifdef AARCH64 void InterpreterMacroAssembler::pop_l(Register r) { assert(r != Rstack_top, "unpredictable instruction"); ldr(r, Address(Rstack_top, 2*wordSize, post_indexed)); } #else void InterpreterMacroAssembler::pop_l(Register lo, Register hi) { assert_different_registers(lo, hi); assert(lo < hi, "lo must be < hi"); pop(RegisterSet(lo) | RegisterSet(hi)); } #endif // AARCH64 void InterpreterMacroAssembler::pop_f(FloatRegister fd) { #ifdef AARCH64 ldr_s(fd, Address(Rstack_top, wordSize, post_indexed)); #else fpops(fd); #endif // AARCH64 } void InterpreterMacroAssembler::pop_d(FloatRegister fd) { #ifdef AARCH64 ldr_d(fd, Address(Rstack_top, 2*wordSize, post_indexed)); #else fpopd(fd); #endif // AARCH64 } // Transition vtos -> state. Blows R0, R1. Sets TOS cached value. void InterpreterMacroAssembler::pop(TosState state) { switch (state) { case atos: pop_ptr(R0_tos); break; case btos: // fall through case ztos: // fall through case ctos: // fall through case stos: // fall through case itos: pop_i(R0_tos); break; #ifdef AARCH64 case ltos: pop_l(R0_tos); break; #else case ltos: pop_l(R0_tos_lo, R1_tos_hi); break; #endif // AARCH64 #ifdef __SOFTFP__ case ftos: pop_i(R0_tos); break; case dtos: pop_l(R0_tos_lo, R1_tos_hi); break; #else case ftos: pop_f(S0_tos); break; case dtos: pop_d(D0_tos); break; #endif // __SOFTFP__ case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } interp_verify_oop(R0_tos, state, __FILE__, __LINE__); } void InterpreterMacroAssembler::push_ptr(Register r) { assert(r != Rstack_top, "unpredictable instruction"); str(r, Address(Rstack_top, -wordSize, pre_indexed)); check_stack_top_on_expansion(); } void InterpreterMacroAssembler::push_i(Register r) { assert(r != Rstack_top, "unpredictable instruction"); str_32(r, Address(Rstack_top, -wordSize, pre_indexed)); check_stack_top_on_expansion(); } #ifdef AARCH64 void InterpreterMacroAssembler::push_l(Register r) { assert(r != Rstack_top, "unpredictable instruction"); stp(r, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); check_stack_top_on_expansion(); } #else void InterpreterMacroAssembler::push_l(Register lo, Register hi) { assert_different_registers(lo, hi); assert(lo < hi, "lo must be < hi"); push(RegisterSet(lo) | RegisterSet(hi)); } #endif // AARCH64 void InterpreterMacroAssembler::push_f() { #ifdef AARCH64 str_s(S0_tos, Address(Rstack_top, -wordSize, pre_indexed)); check_stack_top_on_expansion(); #else fpushs(S0_tos); #endif // AARCH64 } void InterpreterMacroAssembler::push_d() { #ifdef AARCH64 str_d(D0_tos, Address(Rstack_top, -2*wordSize, pre_indexed)); check_stack_top_on_expansion(); #else fpushd(D0_tos); #endif // AARCH64 } // Transition state -> vtos. Blows Rtemp. void InterpreterMacroAssembler::push(TosState state) { interp_verify_oop(R0_tos, state, __FILE__, __LINE__); switch (state) { case atos: push_ptr(R0_tos); break; case btos: // fall through case ztos: // fall through case ctos: // fall through case stos: // fall through case itos: push_i(R0_tos); break; #ifdef AARCH64 case ltos: push_l(R0_tos); break; #else case ltos: push_l(R0_tos_lo, R1_tos_hi); break; #endif // AARCH64 #ifdef __SOFTFP__ case ftos: push_i(R0_tos); break; case dtos: push_l(R0_tos_lo, R1_tos_hi); break; #else case ftos: push_f(); break; case dtos: push_d(); break; #endif // __SOFTFP__ case vtos: /* nothing to do */ break; default : ShouldNotReachHere(); } } #ifndef AARCH64 // Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value. void InterpreterMacroAssembler::convert_retval_to_tos(TosState state) { #if (!defined __SOFTFP__ && !defined __ABI_HARD__) // According to interpreter calling conventions, result is returned in R0/R1, // but templates expect ftos in S0, and dtos in D0. if (state == ftos) { fmsr(S0_tos, R0); } else if (state == dtos) { fmdrr(D0_tos, R0, R1); } #endif // !__SOFTFP__ && !__ABI_HARD__ } // Converts TOS cached value to return value in R0/R1 (according to interpreter calling conventions). void InterpreterMacroAssembler::convert_tos_to_retval(TosState state) { #if (!defined __SOFTFP__ && !defined __ABI_HARD__) // According to interpreter calling conventions, result is returned in R0/R1, // so ftos (S0) and dtos (D0) are moved to R0/R1. if (state == ftos) { fmrs(R0, S0_tos); } else if (state == dtos) { fmrrd(R0, R1, D0_tos); } #endif // !__SOFTFP__ && !__ABI_HARD__ } #endif // !AARCH64 // Helpers for swap and dup void InterpreterMacroAssembler::load_ptr(int n, Register val) { ldr(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); } void InterpreterMacroAssembler::store_ptr(int n, Register val) { str(val, Address(Rstack_top, Interpreter::expr_offset_in_bytes(n))); } void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { #ifdef AARCH64 check_no_cached_stack_top(Rtemp); save_stack_top(); cut_sp_before_call(); mov(Rparams, Rstack_top); #endif // AARCH64 // set sender sp mov(Rsender_sp, SP); #ifndef AARCH64 // record last_sp str(Rsender_sp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); #endif // !AARCH64 } // Jump to from_interpreted entry of a call unless single stepping is possible // in this thread in which case we must call the i2i entry void InterpreterMacroAssembler::jump_from_interpreted(Register method) { assert_different_registers(method, Rtemp); prepare_to_jump_from_interpreted(); if (can_post_interpreter_events()) { // JVMTI events, such as single-stepping, are implemented partly by avoiding running // compiled code in threads for which the event is enabled. Check here for // interp_only_mode if these events CAN be enabled. ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); #ifdef AARCH64 { Label not_interp_only_mode; cbz(Rtemp, not_interp_only_mode); indirect_jump(Address(method, Method::interpreter_entry_offset()), Rtemp); bind(not_interp_only_mode); } #else cmp(Rtemp, 0); ldr(PC, Address(method, Method::interpreter_entry_offset()), ne); #endif // AARCH64 } indirect_jump(Address(method, Method::from_interpreted_offset()), Rtemp); } void InterpreterMacroAssembler::restore_dispatch() { mov_slow(RdispatchTable, (address)Interpreter::dispatch_table(vtos)); } // The following two routines provide a hook so that an implementation // can schedule the dispatch in two parts. void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { // Nothing ARM-specific to be done here. } void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { dispatch_next(state, step); } void InterpreterMacroAssembler::dispatch_base(TosState state, DispatchTableMode table_mode, bool verifyoop) { if (VerifyActivationFrameSize) { Label L; #ifdef AARCH64 mov(Rtemp, SP); sub(Rtemp, FP, Rtemp); #else sub(Rtemp, FP, SP); #endif // AARCH64 int min_frame_size = (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * wordSize; cmp(Rtemp, min_frame_size); b(L, ge); stop("broken stack frame"); bind(L); } if (verifyoop) { interp_verify_oop(R0_tos, state, __FILE__, __LINE__); } if((state == itos) || (state == btos) || (state == ztos) || (state == ctos) || (state == stos)) { zap_high_non_significant_bits(R0_tos); } #ifdef ASSERT Label L; mov_slow(Rtemp, (address)Interpreter::dispatch_table(vtos)); cmp(Rtemp, RdispatchTable); b(L, eq); stop("invalid RdispatchTable"); bind(L); #endif if (table_mode == DispatchDefault) { if (state == vtos) { indirect_jump(Address::indexed_ptr(RdispatchTable, R3_bytecode), Rtemp); } else { #ifdef AARCH64 sub(Rtemp, R3_bytecode, (Interpreter::distance_from_dispatch_table(vtos) - Interpreter::distance_from_dispatch_table(state))); indirect_jump(Address::indexed_ptr(RdispatchTable, Rtemp), Rtemp); #else // on 32-bit ARM this method is faster than the one above. sub(Rtemp, RdispatchTable, (Interpreter::distance_from_dispatch_table(vtos) - Interpreter::distance_from_dispatch_table(state)) * wordSize); indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); #endif } } else { assert(table_mode == DispatchNormal, "invalid dispatch table mode"); address table = (address) Interpreter::normal_table(state); mov_slow(Rtemp, table); indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); } nop(); // to avoid filling CPU pipeline with invalid instructions nop(); } void InterpreterMacroAssembler::dispatch_only(TosState state) { dispatch_base(state, DispatchDefault); } void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { dispatch_base(state, DispatchNormal); } void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { dispatch_base(state, DispatchNormal, false); } void InterpreterMacroAssembler::dispatch_next(TosState state, int step) { // load next bytecode and advance Rbcp ldrb(R3_bytecode, Address(Rbcp, step, pre_indexed)); dispatch_base(state, DispatchDefault); } void InterpreterMacroAssembler::narrow(Register result) { // mask integer result to narrower return type. const Register Rtmp = R2; // get method type ldr(Rtmp, Address(Rmethod, Method::const_offset())); ldrb(Rtmp, Address(Rtmp, ConstMethod::result_type_offset())); Label notBool, notByte, notChar, done; cmp(Rtmp, T_INT); b(done, eq); cmp(Rtmp, T_BOOLEAN); b(notBool, ne); and_32(result, result, 1); b(done); bind(notBool); cmp(Rtmp, T_BYTE); b(notByte, ne); sign_extend(result, result, 8); b(done); bind(notByte); cmp(Rtmp, T_CHAR); b(notChar, ne); zero_extend(result, result, 16); b(done); bind(notChar); // cmp(Rtmp, T_SHORT); // b(done, ne); sign_extend(result, result, 16); // Nothing to do bind(done); } // remove activation // // Unlock the receiver if this is a synchronized method. // Unlock any Java monitors from syncronized blocks. // Remove the activation from the stack. // // If there are locked Java monitors // If throw_monitor_exception // throws IllegalMonitorStateException // Else if install_monitor_exception // installs IllegalMonitorStateException // Else // no error processing void InterpreterMacroAssembler::remove_activation(TosState state, Register ret_addr, bool throw_monitor_exception, bool install_monitor_exception, bool notify_jvmdi) { Label unlock, unlocked, no_unlock; // Note: Registers R0, R1, S0 and D0 (TOS cached value) may be in use for the result. const Address do_not_unlock_if_synchronized(Rthread, JavaThread::do_not_unlock_if_synchronized_offset()); const Register Rflag = R2; const Register Raccess_flags = R3; restore_method(); ldrb(Rflag, do_not_unlock_if_synchronized); // get method access flags ldr_u32(Raccess_flags, Address(Rmethod, Method::access_flags_offset())); strb(zero_register(Rtemp), do_not_unlock_if_synchronized); // reset the flag // check if method is synchronized tbz(Raccess_flags, JVM_ACC_SYNCHRONIZED_BIT, unlocked); // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. cbnz(Rflag, no_unlock); // unlock monitor push(state); // save result // BasicObjectLock will be first in list, since this is a synchronized method. However, need // to check that the object has not been unlocked by an explicit monitorexit bytecode. const Register Rmonitor = R1; // fixed in unlock_object() const Register Robj = R2; // address of first monitor sub(Rmonitor, FP, - frame::interpreter_frame_monitor_block_bottom_offset * wordSize + (int)sizeof(BasicObjectLock)); ldr(Robj, Address(Rmonitor, BasicObjectLock::obj_offset_in_bytes())); cbnz(Robj, unlock); pop(state); if (throw_monitor_exception) { // Entry already unlocked, need to throw exception call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); should_not_reach_here(); } else { // Monitor already unlocked during a stack unroll. // If requested, install an illegal_monitor_state_exception. // Continue with stack unrolling. if (install_monitor_exception) { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); } b(unlocked); } // Exception case for the check that all monitors are unlocked. const Register Rcur = R2; Label restart_check_monitors_unlocked, exception_monitor_is_still_locked; bind(exception_monitor_is_still_locked); // Monitor entry is still locked, need to throw exception. // Rcur: monitor entry. if (throw_monitor_exception) { // Throw exception call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); should_not_reach_here(); } else { // Stack unrolling. Unlock object and install illegal_monitor_exception // Unlock does not block, so don't have to worry about the frame push(state); mov(R1, Rcur); unlock_object(R1); if (install_monitor_exception) { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); } pop(state); b(restart_check_monitors_unlocked); } bind(unlock); unlock_object(Rmonitor); pop(state); // Check that for block-structured locking (i.e., that all locked objects has been unlocked) bind(unlocked); // Check that all monitors are unlocked { Label loop; const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; const Register Rbottom = R3; const Register Rcur_obj = Rtemp; bind(restart_check_monitors_unlocked); ldr(Rcur, Address(FP, frame::interpreter_frame_monitor_block_top_offset * wordSize)); // points to current entry, starting with top-most entry sub(Rbottom, FP, -frame::interpreter_frame_monitor_block_bottom_offset * wordSize); // points to word before bottom of monitor block cmp(Rcur, Rbottom); // check if there are no monitors #ifndef AARCH64 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object #endif // !AARCH64 b(no_unlock, eq); bind(loop); #ifdef AARCH64 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes())); #endif // AARCH64 // check if current entry is used cbnz(Rcur_obj, exception_monitor_is_still_locked); add(Rcur, Rcur, entry_size); // otherwise advance to next entry cmp(Rcur, Rbottom); // check if bottom reached #ifndef AARCH64 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object #endif // !AARCH64 b(loop, ne); // if not at bottom then check this entry } bind(no_unlock); // jvmti support if (notify_jvmdi) { notify_method_exit(state, NotifyJVMTI); // preserve TOSCA } else { notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA } // remove activation #ifdef AARCH64 ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ldp(FP, LR, Address(FP)); mov(SP, Rtemp); #else mov(Rtemp, FP); ldmia(FP, RegisterSet(FP) | RegisterSet(LR)); ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize)); #endif if (ret_addr != LR) { mov(ret_addr, LR); } } // At certain points in the method invocation the monitor of // synchronized methods hasn't been entered yet. // To correctly handle exceptions at these points, we set the thread local // variable _do_not_unlock_if_synchronized to true. The remove_activation will // check this flag. void InterpreterMacroAssembler::set_do_not_unlock_if_synchronized(bool flag, Register tmp) { const Address do_not_unlock_if_synchronized(Rthread, JavaThread::do_not_unlock_if_synchronized_offset()); if (flag) { mov(tmp, 1); strb(tmp, do_not_unlock_if_synchronized); } else { strb(zero_register(tmp), do_not_unlock_if_synchronized); } } // Lock object // // Argument: R1 : Points to BasicObjectLock to be used for locking. // Must be initialized with object to lock. // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. void InterpreterMacroAssembler::lock_object(Register Rlock) { assert(Rlock == R1, "the second argument"); if (UseHeavyMonitors) { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); } else { Label done; const Register Robj = R2; const Register Rmark = R3; assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); Label already_locked, slow_case; // Load object pointer ldr(Robj, Address(Rlock, obj_offset)); if (UseBiasedLocking) { biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case); } #ifdef AARCH64 assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); ldr(Rmark, Robj); // Test if object is already locked assert(markOopDesc::unlocked_value == 1, "adjust this code"); tbz(Rmark, exact_log2(markOopDesc::unlocked_value), already_locked); #else // AARCH64 // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. // That would be acceptable as ether CAS or slow case path is taken in that case. // Exception to that is if the object is locked by the calling thread, then the recursive test will pass (guaranteed as // loads are satisfied from a store queue if performed on the same processor). assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); ldr(Rmark, Address(Robj, oopDesc::mark_offset_in_bytes())); // Test if object is already locked tst(Rmark, markOopDesc::unlocked_value); b(already_locked, eq); #endif // !AARCH64 // Save old object->mark() into BasicLock's displaced header str(Rmark, Address(Rlock, mark_offset)); cas_for_lock_acquire(Rmark, Rlock, Robj, Rtemp, slow_case); #ifndef PRODUCT if (PrintBiasedLockingStatistics) { cond_atomic_inc32(al, BiasedLocking::fast_path_entry_count_addr()); } #endif //!PRODUCT b(done); // If we got here that means the object is locked by ether calling thread or another thread. bind(already_locked); // Handling of locked objects: recursive locks and slow case. // Fast check for recursive lock. // // Can apply the optimization only if this is a stack lock // allocated in this thread. For efficiency, we can focus on // recently allocated stack locks (instead of reading the stack // base and checking whether 'mark' points inside the current // thread stack): // 1) (mark & 3) == 0 // 2) SP <= mark < SP + os::pagesize() // // Warning: SP + os::pagesize can overflow the stack base. We must // neither apply the optimization for an inflated lock allocated // just above the thread stack (this is why condition 1 matters) // nor apply the optimization if the stack lock is inside the stack // of another thread. The latter is avoided even in case of overflow // because we have guard pages at the end of all stacks. Hence, if // we go over the stack base and hit the stack of another thread, // this should not be in a writeable area that could contain a // stack lock allocated by that thread. As a consequence, a stack // lock less than page size away from SP is guaranteed to be // owned by the current thread. // // Note: assuming SP is aligned, we can check the low bits of // (mark-SP) instead of the low bits of mark. In that case, // assuming page size is a power of 2, we can merge the two // conditions into a single test: // => ((mark - SP) & (3 - os::pagesize())) == 0 #ifdef AARCH64 // Use the single check since the immediate is OK for AARCH64 sub(R0, Rmark, Rstack_top); intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); Assembler::LogicalImmediate imm(mask, false); ands(R0, R0, imm); // For recursive case store 0 into lock record. // It is harmless to store it unconditionally as lock record contains some garbage // value in its _displaced_header field by this moment. str(ZR, Address(Rlock, mark_offset)); #else // AARCH64 // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand. // Check independently the low bits and the distance to SP. // -1- test low 2 bits movs(R0, AsmOperand(Rmark, lsl, 30)); // -2- test (mark - SP) if the low two bits are 0 sub(R0, Rmark, SP, eq); movs(R0, AsmOperand(R0, lsr, exact_log2(os::vm_page_size())), eq); // If still 'eq' then recursive locking OK: store 0 into lock record str(R0, Address(Rlock, mark_offset), eq); #endif // AARCH64 #ifndef PRODUCT if (PrintBiasedLockingStatistics) { cond_atomic_inc32(eq, BiasedLocking::fast_path_entry_count_addr()); } #endif // !PRODUCT b(done, eq); bind(slow_case); // Call the runtime routine for slow case call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); bind(done); } } // Unlocks an object. Used in monitorexit bytecode and remove_activation. // // Argument: R1: Points to BasicObjectLock structure for lock // Throw an IllegalMonitorException if object is not locked by current thread // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. void InterpreterMacroAssembler::unlock_object(Register Rlock) { assert(Rlock == R1, "the second argument"); if (UseHeavyMonitors) { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); } else { Label done, slow_case; const Register Robj = R2; const Register Rmark = R3; const Register Rresult = R0; assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); const Register Rzero = zero_register(Rtemp); // Load oop into Robj ldr(Robj, Address(Rlock, obj_offset)); // Free entry str(Rzero, Address(Rlock, obj_offset)); if (UseBiasedLocking) { biased_locking_exit(Robj, Rmark, done); } // Load the old header from BasicLock structure ldr(Rmark, Address(Rlock, mark_offset)); // Test for recursion (zero mark in BasicLock) cbz(Rmark, done); bool allow_fallthrough_on_failure = true; cas_for_lock_release(Rlock, Rmark, Robj, Rtemp, slow_case, allow_fallthrough_on_failure); b(done, eq); bind(slow_case); // Call the runtime routine for slow case. str(Robj, Address(Rlock, obj_offset)); // restore obj call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); bind(done); } } // Test ImethodDataPtr. If it is null, continue at the specified label void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) { assert(ProfileInterpreter, "must be profiling interpreter"); ldr(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); cbz(mdp, zero_continue); } // Set the method data pointer for the current bcp. // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { assert(ProfileInterpreter, "must be profiling interpreter"); Label set_mdp; // Test MDO to avoid the call if it is NULL. ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); cbz(Rtemp, set_mdp); mov(R0, Rmethod); mov(R1, Rbcp); call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), R0, R1); // R0/W0: mdi // mdo is guaranteed to be non-zero here, we checked for it before the call. ldr(Rtemp, Address(Rmethod, Method::method_data_offset())); add(Rtemp, Rtemp, in_bytes(MethodData::data_offset())); add_ptr_scaled_int32(Rtemp, Rtemp, R0, 0); bind(set_mdp); str(Rtemp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); } void InterpreterMacroAssembler::verify_method_data_pointer() { assert(ProfileInterpreter, "must be profiling interpreter"); #ifdef ASSERT Label verify_continue; save_caller_save_registers(); const Register Rmdp = R2; test_method_data_pointer(Rmdp, verify_continue); // If mdp is zero, continue // If the mdp is valid, it will point to a DataLayout header which is // consistent with the bcp. The converse is highly probable also. ldrh(R3, Address(Rmdp, DataLayout::bci_offset())); ldr(Rtemp, Address(Rmethod, Method::const_offset())); add(R3, R3, Rtemp); add(R3, R3, in_bytes(ConstMethod::codes_offset())); cmp(R3, Rbcp); b(verify_continue, eq); mov(R0, Rmethod); mov(R1, Rbcp); call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), R0, R1, Rmdp); bind(verify_continue); restore_caller_save_registers(); #endif // ASSERT } void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, int offset, Register value) { assert(ProfileInterpreter, "must be profiling interpreter"); assert_different_registers(mdp_in, value); str(value, Address(mdp_in, offset)); } // Increments mdp data. Sets bumped_count register to adjusted counter. void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, int offset, Register bumped_count, bool decrement) { assert(ProfileInterpreter, "must be profiling interpreter"); // Counter address Address data(mdp_in, offset); assert_different_registers(mdp_in, bumped_count); increment_mdp_data_at(data, bumped_count, decrement); } void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, int flag_byte_constant) { assert_different_registers(mdp_in, Rtemp); assert(ProfileInterpreter, "must be profiling interpreter"); assert((0 < flag_byte_constant) && (flag_byte_constant < (1 << BitsPerByte)), "flag mask is out of range"); // Set the flag ldrb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); orr(Rtemp, Rtemp, (unsigned)flag_byte_constant); strb(Rtemp, Address(mdp_in, in_bytes(DataLayout::flags_offset()))); } // Increments mdp data. Sets bumped_count register to adjusted counter. void InterpreterMacroAssembler::increment_mdp_data_at(Address data, Register bumped_count, bool decrement) { assert(ProfileInterpreter, "must be profiling interpreter"); ldr(bumped_count, data); if (decrement) { // Decrement the register. Set condition codes. subs(bumped_count, bumped_count, DataLayout::counter_increment); // Avoid overflow. #ifdef AARCH64 assert(DataLayout::counter_increment == 1, "required for cinc"); cinc(bumped_count, bumped_count, pl); #else add(bumped_count, bumped_count, DataLayout::counter_increment, pl); #endif // AARCH64 } else { // Increment the register. Set condition codes. adds(bumped_count, bumped_count, DataLayout::counter_increment); // Avoid overflow. #ifdef AARCH64 assert(DataLayout::counter_increment == 1, "required for cinv"); cinv(bumped_count, bumped_count, mi); // inverts 0x80..00 back to 0x7f..ff #else sub(bumped_count, bumped_count, DataLayout::counter_increment, mi); #endif // AARCH64 } str(bumped_count, data); } void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, int offset, Register value, Register test_value_out, Label& not_equal_continue) { assert(ProfileInterpreter, "must be profiling interpreter"); assert_different_registers(mdp_in, test_value_out, value); ldr(test_value_out, Address(mdp_in, offset)); cmp(test_value_out, value); b(not_equal_continue, ne); } void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, int offset_of_disp, Register reg_temp) { assert(ProfileInterpreter, "must be profiling interpreter"); assert_different_registers(mdp_in, reg_temp); ldr(reg_temp, Address(mdp_in, offset_of_disp)); add(mdp_in, mdp_in, reg_temp); str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); } void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, Register reg_offset, Register reg_tmp) { assert(ProfileInterpreter, "must be profiling interpreter"); assert_different_registers(mdp_in, reg_offset, reg_tmp); ldr(reg_tmp, Address(mdp_in, reg_offset)); add(mdp_in, mdp_in, reg_tmp); str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); } void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, int constant) { assert(ProfileInterpreter, "must be profiling interpreter"); add(mdp_in, mdp_in, constant); str(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); } // Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { assert(ProfileInterpreter, "must be profiling interpreter"); assert_different_registers(return_bci, R0, R1, R2, R3, Rtemp); mov(R1, return_bci); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), R1); } // Sets mdp, bumped_count registers, blows Rtemp. void InterpreterMacroAssembler::profile_taken_branch(Register mdp, Register bumped_count) { assert_different_registers(mdp, bumped_count); if (ProfileInterpreter) { Label profile_continue; // If no method data exists, go to profile_continue. // Otherwise, assign to mdp test_method_data_pointer(mdp, profile_continue); // We are taking a branch. Increment the taken count. increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()), bumped_count); // The method data pointer needs to be updated to reflect the new target. update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()), Rtemp); bind (profile_continue); } } // Sets mdp, blows Rtemp. void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { assert_different_registers(mdp, Rtemp); if (ProfileInterpreter) { Label profile_continue; // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); // We are taking a branch. Increment the not taken count. increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()), Rtemp); // The method data pointer needs to be updated to correspond to the next bytecode update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); bind (profile_continue); } } // Sets mdp, blows Rtemp. void InterpreterMacroAssembler::profile_call(Register mdp) { assert_different_registers(mdp, Rtemp); if (ProfileInterpreter) { Label profile_continue; // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); // We are making a call. Increment the count. increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); // The method data pointer needs to be updated to reflect the new target. update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); bind (profile_continue); } } // Sets mdp, blows Rtemp. void InterpreterMacroAssembler::profile_final_call(Register mdp) { if (ProfileInterpreter) { Label profile_continue; // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); // We are making a call. Increment the count. increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); // The method data pointer needs to be updated to reflect the new target. update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); bind (profile_continue); } } // Sets mdp, blows Rtemp. void InterpreterMacroAssembler::profile_virtual_call(Register mdp, Register receiver, bool receiver_can_be_null) { assert_different_registers(mdp, receiver, Rtemp); if (ProfileInterpreter) { Label profile_continue; // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); Label skip_receiver_profile; if (receiver_can_be_null) { Label not_null; cbnz(receiver, not_null); // We are making a call. Increment the count for null receiver. increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); b(skip_receiver_profile); bind(not_null); } // Record the receiver type. record_klass_in_profile(receiver, mdp, Rtemp, true); bind(skip_receiver_profile); // The method data pointer needs to be updated to reflect the new target. update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); bind(profile_continue); } } void InterpreterMacroAssembler::record_klass_in_profile_helper( Register receiver, Register mdp, Register reg_tmp, int start_row, Label& done, bool is_virtual_call) { if (TypeProfileWidth == 0) return; assert_different_registers(receiver, mdp, reg_tmp); int last_row = VirtualCallData::row_limit() - 1; assert(start_row <= last_row, "must be work left to do"); // Test this row for both the receiver and for null. // Take any of three different outcomes: // 1. found receiver => increment count and goto done // 2. found null => keep looking for case 1, maybe allocate this cell // 3. found something else => keep looking for cases 1 and 2 // Case 3 is handled by a recursive call. for (int row = start_row; row <= last_row; row++) { Label next_test; // See if the receiver is receiver[n]. int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); test_mdp_data_at(mdp, recvr_offset, receiver, reg_tmp, next_test); // The receiver is receiver[n]. Increment count[n]. int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); increment_mdp_data_at(mdp, count_offset, reg_tmp); b(done); bind(next_test); // reg_tmp now contains the receiver from the CallData. if (row == start_row) { Label found_null; // Failed the equality check on receiver[n]... Test for null. if (start_row == last_row) { // The only thing left to do is handle the null case. if (is_virtual_call) { cbz(reg_tmp, found_null); // Receiver did not match any saved receiver and there is no empty row for it. // Increment total counter to indicate polymorphic case. increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), reg_tmp); b(done); bind(found_null); } else { cbnz(reg_tmp, done); } break; } // Since null is rare, make it be the branch-taken case. cbz(reg_tmp, found_null); // Put all the "Case 3" tests here. record_klass_in_profile_helper(receiver, mdp, reg_tmp, start_row + 1, done, is_virtual_call); // Found a null. Keep searching for a matching receiver, // but remember that this is an empty (unused) slot. bind(found_null); } } // In the fall-through case, we found no matching receiver, but we // observed the receiver[start_row] is NULL. // Fill in the receiver field and increment the count. int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); set_mdp_data_at(mdp, recvr_offset, receiver); int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); mov(reg_tmp, DataLayout::counter_increment); set_mdp_data_at(mdp, count_offset, reg_tmp); if (start_row > 0) { b(done); } } void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, Register mdp, Register reg_tmp, bool is_virtual_call) { assert(ProfileInterpreter, "must be profiling"); assert_different_registers(receiver, mdp, reg_tmp); Label done; record_klass_in_profile_helper(receiver, mdp, reg_tmp, 0, done, is_virtual_call); bind (done); } // Sets mdp, blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). void InterpreterMacroAssembler::profile_ret(Register mdp, Register return_bci) { assert_different_registers(mdp, return_bci, Rtemp, R0, R1, R2, R3); if (ProfileInterpreter) { Label profile_continue; uint row; // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); // Update the total ret count. increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()), Rtemp); for (row = 0; row < RetData::row_limit(); row++) { Label next_test; // See if return_bci is equal to bci[n]: test_mdp_data_at(mdp, in_bytes(RetData::bci_offset(row)), return_bci, Rtemp, next_test); // return_bci is equal to bci[n]. Increment the count. increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)), Rtemp); // The method data pointer needs to be updated to reflect the new target. update_mdp_by_offset(mdp, in_bytes(RetData::bci_displacement_offset(row)), Rtemp); b(profile_continue); bind(next_test); } update_mdp_for_ret(return_bci); bind(profile_continue); } } // Sets mdp. void InterpreterMacroAssembler::profile_null_seen(Register mdp) { if (ProfileInterpreter) { Label profile_continue; // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); // The method data pointer needs to be updated. int mdp_delta = in_bytes(BitData::bit_data_size()); if (TypeProfileCasts) { mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); } update_mdp_by_constant(mdp, mdp_delta); bind (profile_continue); } } // Sets mdp, blows Rtemp. void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { assert_different_registers(mdp, Rtemp); if (ProfileInterpreter && TypeProfileCasts) { Label profile_continue; // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); int count_offset = in_bytes(CounterData::count_offset()); // Back up the address, since we have already bumped the mdp. count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); // *Decrement* the counter. We expect to see zero or small negatives. increment_mdp_data_at(mdp, count_offset, Rtemp, true); bind (profile_continue); } } // Sets mdp, blows Rtemp. void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass) { assert_different_registers(mdp, klass, Rtemp); if (ProfileInterpreter) { Label profile_continue; // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); // The method data pointer needs to be updated. int mdp_delta = in_bytes(BitData::bit_data_size()); if (TypeProfileCasts) { mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); // Record the object type. record_klass_in_profile(klass, mdp, Rtemp, false); } update_mdp_by_constant(mdp, mdp_delta); bind(profile_continue); } } // Sets mdp, blows Rtemp. void InterpreterMacroAssembler::profile_switch_default(Register mdp) { assert_different_registers(mdp, Rtemp); if (ProfileInterpreter) { Label profile_continue; // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); // Update the default case count increment_mdp_data_at(mdp, in_bytes(MultiBranchData::default_count_offset()), Rtemp); // The method data pointer needs to be updated. update_mdp_by_offset(mdp, in_bytes(MultiBranchData::default_displacement_offset()), Rtemp); bind(profile_continue); } } // Sets mdp. Blows reg_tmp1, reg_tmp2. Index could be the same as reg_tmp2. void InterpreterMacroAssembler::profile_switch_case(Register mdp, Register index, Register reg_tmp1, Register reg_tmp2) { assert_different_registers(mdp, reg_tmp1, reg_tmp2); assert_different_registers(mdp, reg_tmp1, index); if (ProfileInterpreter) { Label profile_continue; const int count_offset = in_bytes(MultiBranchData::case_array_offset()) + in_bytes(MultiBranchData::relative_count_offset()); const int displacement_offset = in_bytes(MultiBranchData::case_array_offset()) + in_bytes(MultiBranchData::relative_displacement_offset()); // If no method data exists, go to profile_continue. test_method_data_pointer(mdp, profile_continue); // Build the base (index * per_case_size_in_bytes()) logical_shift_left(reg_tmp1, index, exact_log2(in_bytes(MultiBranchData::per_case_size()))); // Update the case count add(reg_tmp1, reg_tmp1, count_offset); increment_mdp_data_at(Address(mdp, reg_tmp1), reg_tmp2); // The method data pointer needs to be updated. add(reg_tmp1, reg_tmp1, displacement_offset - count_offset); update_mdp_by_offset(mdp, reg_tmp1, reg_tmp2); bind (profile_continue); } } void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Register rtmp2) { #ifdef AARCH64 rev_w(r, r); #else if (VM_Version::supports_rev()) { rev(r, r); } else { eor(rtmp1, r, AsmOperand(r, ror, 16)); mvn(rtmp2, 0x0000ff00); andr(rtmp1, rtmp2, AsmOperand(rtmp1, lsr, 8)); eor(r, rtmp1, AsmOperand(r, ror, 8)); } #endif // AARCH64 } void InterpreterMacroAssembler::inc_global_counter(address address_of_counter, int offset, Register tmp1, Register tmp2, bool avoid_overflow) { const intx addr = (intx) (address_of_counter + offset); assert ((addr & 0x3) == 0, "address of counter should be aligned"); const intx offset_mask = right_n_bits(AARCH64_ONLY(12 + 2) NOT_AARCH64(12)); const address base = (address) (addr & ~offset_mask); const int offs = (int) (addr & offset_mask); const Register addr_base = tmp1; const Register val = tmp2; mov_slow(addr_base, base); ldr_s32(val, Address(addr_base, offs)); if (avoid_overflow) { adds_32(val, val, 1); #ifdef AARCH64 Label L; b(L, mi); str_32(val, Address(addr_base, offs)); bind(L); #else str(val, Address(addr_base, offs), pl); #endif // AARCH64 } else { add_32(val, val, 1); str_32(val, Address(addr_base, offs)); } } void InterpreterMacroAssembler::interp_verify_oop(Register reg, TosState state, const char *file, int line) { if (state == atos) { MacroAssembler::_verify_oop(reg, "broken oop", file, line); } } // Inline assembly for: // // if (thread is in interp_only_mode) { // InterpreterRuntime::post_method_entry(); // } // if (DTraceMethodProbes) { // SharedRuntime::dtrace_method_entry(method, receiver); // } // if (RC_TRACE_IN_RANGE(0x00001000, 0x00002000)) { // SharedRuntime::rc_trace_method_entry(method, receiver); // } void InterpreterMacroAssembler::notify_method_entry() { // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to // track stack depth. If it is possible to enter interp_only_mode we add // the code to check if the event should be sent. if (can_post_interpreter_events()) { Label L; ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); cbz(Rtemp, L); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry)); bind(L); } // Note: Disable DTrace runtime check for now to eliminate overhead on each method entry if (DTraceMethodProbes) { Label Lcontinue; ldrb_global(Rtemp, (address)&DTraceMethodProbes); cbz(Rtemp, Lcontinue); mov(R0, Rthread); mov(R1, Rmethod); call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), R0, R1); bind(Lcontinue); } // RedefineClasses() tracing support for obsolete method entry if (log_is_enabled(Trace, redefine, class, obsolete)) { mov(R0, Rthread); mov(R1, Rmethod); call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1); } } void InterpreterMacroAssembler::notify_method_exit( TosState state, NotifyMethodExitMode mode, bool native, Register result_lo, Register result_hi, FloatRegister result_fp) { // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to // track stack depth. If it is possible to enter interp_only_mode we add // the code to check if the event should be sent. if (mode == NotifyJVMTI && can_post_interpreter_events()) { Label L; // Note: frame::interpreter_frame_result has a dependency on how the // method result is saved across the call to post_method_exit. If this // is changed then the interpreter_frame_result implementation will // need to be updated too. ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); cbz(Rtemp, L); if (native) { // For c++ and template interpreter push both result registers on the // stack in native, we don't know the state. // On AArch64 result registers are stored into the frame at known locations. // See frame::interpreter_frame_result for code that gets the result values from here. assert(result_lo != noreg, "result registers should be defined"); #ifdef AARCH64 assert(result_hi == noreg, "result_hi is not used on AArch64"); assert(result_fp != fnoreg, "FP result register must be defined"); str_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); str(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); #else assert(result_hi != noreg, "result registers should be defined"); #ifdef __ABI_HARD__ assert(result_fp != fnoreg, "FP result register must be defined"); sub(SP, SP, 2 * wordSize); fstd(result_fp, Address(SP)); #endif // __ABI_HARD__ push(RegisterSet(result_lo) | RegisterSet(result_hi)); #endif // AARCH64 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); #ifdef AARCH64 ldr_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); ldr(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); #else pop(RegisterSet(result_lo) | RegisterSet(result_hi)); #ifdef __ABI_HARD__ fldd(result_fp, Address(SP)); add(SP, SP, 2 * wordSize); #endif // __ABI_HARD__ #endif // AARCH64 } else { // For the template interpreter, the value on tos is the size of the // state. (c++ interpreter calls jvmti somewhere else). push(state); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); pop(state); } bind(L); } // Note: Disable DTrace runtime check for now to eliminate overhead on each method exit if (DTraceMethodProbes) { Label Lcontinue; ldrb_global(Rtemp, (address)&DTraceMethodProbes); cbz(Rtemp, Lcontinue); push(state); mov(R0, Rthread); mov(R1, Rmethod); call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), R0, R1); pop(state); bind(Lcontinue); } } #ifndef PRODUCT void InterpreterMacroAssembler::trace_state(const char* msg) { int push_size = save_caller_save_registers(); Label Lcontinue; InlinedString Lmsg0("%s: FP=" INTPTR_FORMAT ", SP=" INTPTR_FORMAT "\n"); InlinedString Lmsg(msg); InlinedAddress Lprintf((address)printf); ldr_literal(R0, Lmsg0); ldr_literal(R1, Lmsg); mov(R2, FP); add(R3, SP, push_size); // original SP (without saved registers) ldr_literal(Rtemp, Lprintf); call(Rtemp); b(Lcontinue); bind_literal(Lmsg0); bind_literal(Lmsg); bind_literal(Lprintf); bind(Lcontinue); restore_caller_save_registers(); } #endif // Jump if ((*counter_addr += increment) & mask) satisfies the condition. void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, int increment, Address mask_addr, Register scratch, Register scratch2, AsmCondition cond, Label* where) { // caution: scratch2 and base address of counter_addr can be the same assert_different_registers(scratch, scratch2); ldr_u32(scratch, counter_addr); add(scratch, scratch, increment); str_32(scratch, counter_addr); #ifdef AARCH64 ldr_u32(scratch2, mask_addr); ands_w(ZR, scratch, scratch2); #else ldr(scratch2, mask_addr); andrs(scratch, scratch, scratch2); #endif // AARCH64 b(*where, cond); } void InterpreterMacroAssembler::get_method_counters(Register method, Register Rcounters, Label& skip, bool saveRegs, Register reg1, Register reg2, Register reg3) { const Address method_counters(method, Method::method_counters_offset()); Label has_counters; ldr(Rcounters, method_counters); cbnz(Rcounters, has_counters); if (saveRegs) { // Save and restore in use caller-saved registers since they will be trashed by call_VM assert(reg1 != noreg, "must specify reg1"); assert(reg2 != noreg, "must specify reg2"); #ifdef AARCH64 assert(reg3 != noreg, "must specify reg3"); stp(reg1, reg2, Address(Rstack_top, -2*wordSize, pre_indexed)); stp(reg3, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); #else assert(reg3 == noreg, "must not specify reg3"); push(RegisterSet(reg1) | RegisterSet(reg2)); #endif } mov(R1, method); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), R1); if (saveRegs) { #ifdef AARCH64 ldp(reg3, ZR, Address(Rstack_top, 2*wordSize, post_indexed)); ldp(reg1, reg2, Address(Rstack_top, 2*wordSize, post_indexed)); #else pop(RegisterSet(reg1) | RegisterSet(reg2)); #endif } ldr(Rcounters, method_counters); cbz(Rcounters, skip); // No MethodCounters created, OutOfMemory bind(has_counters); }