--- old/src/hotspot/cpu/arm/interp_masm_arm.cpp 2018-09-17 10:30:10.021196957 -0400 +++ new/src/hotspot/cpu/arm/interp_masm_arm.cpp 2018-09-17 10:30:09.174147483 -0400 @@ -54,7 +54,7 @@ } void InterpreterMacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { -#if defined(ASSERT) && !defined(AARCH64) +#if defined(ASSERT) // Ensure that last_sp is not filled. { Label L; ldr(Rtemp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); @@ -62,27 +62,15 @@ stop("InterpreterMacroAssembler::call_VM_helper: last_sp != NULL"); bind(L); } -#endif // ASSERT && !AARCH64 +#endif // ASSERT // Rbcp must be saved/restored since it may change due to GC. save_bcp(); -#ifdef AARCH64 - check_no_cached_stack_top(Rtemp); - save_stack_top(); - check_extended_sp(Rtemp); - cut_sp_before_call(); -#endif // AARCH64 // super call MacroAssembler::call_VM_helper(oop_result, entry_point, number_of_arguments, check_exceptions); -#ifdef AARCH64 - // Restore SP to extended SP - restore_sp_after_call(Rtemp); - check_stack_top(); - clear_cached_stack_top(); -#endif // AARCH64 // Restore interpreter specific registers. restore_bcp(); @@ -128,10 +116,8 @@ const Address tos_addr(thread_state, JvmtiThreadState::earlyret_tos_offset()); const Address oop_addr(thread_state, JvmtiThreadState::earlyret_oop_offset()); const Address val_addr(thread_state, JvmtiThreadState::earlyret_value_offset()); -#ifndef AARCH64 const Address val_addr_hi(thread_state, JvmtiThreadState::earlyret_value_offset() + in_ByteSize(wordSize)); -#endif // !AARCH64 Register zero = zero_register(Rtemp); @@ -141,11 +127,7 @@ interp_verify_oop(R0_tos, state, __FILE__, __LINE__); break; -#ifdef AARCH64 - case ltos: ldr(R0_tos, val_addr); break; -#else case ltos: ldr(R1_tos_hi, val_addr_hi); // fall through -#endif // AARCH64 case btos: // fall through case ztos: // fall through case ctos: // fall through @@ -163,9 +145,7 @@ } // Clean up tos value in the thread object str(zero, val_addr); -#ifndef AARCH64 str(zero, val_addr_hi); -#endif // !AARCH64 mov(Rtemp, (int) ilgl); str_32(Rtemp, tos_addr); @@ -220,7 +200,6 @@ ldrb(tmp_reg, Address(Rbcp, bcp_offset)); orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); } else if (index_size == sizeof(u4)) { - // TODO-AARCH64: consider using unaligned access here ldrb(index, Address(Rbcp, bcp_offset+3)); ldrb(tmp_reg, Address(Rbcp, bcp_offset+2)); orr(index, tmp_reg, AsmOperand(index, lsl, BitsPerByte)); @@ -252,7 +231,6 @@ // convert from field index to ConstantPoolCacheEntry index assert(sizeof(ConstantPoolCacheEntry) == 4*wordSize, "adjust code below"); - // TODO-AARCH64 merge this shift with shift "add(..., Rcache, AsmOperand(Rindex, lsl, LogBytesPerWord))" after this method is called logical_shift_left(index, index, 2); } @@ -261,13 +239,8 @@ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); // caution index and bytecode can be the same add(bytecode, cache, AsmOperand(index, lsl, LogBytesPerWord)); -#ifdef AARCH64 - add(bytecode, bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); - ldarb(bytecode, bytecode); -#else ldrb(bytecode, Address(bytecode, (1 + byte_no) + in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()))); TemplateTable::volatile_barrier(MacroAssembler::LoadLoad, noreg, true); -#endif // AARCH64 } // Sets cache. Blows reg_tmp. @@ -365,31 +338,21 @@ ldr(supers_arr, Address(Rsub_klass, Klass::secondary_supers_offset())); ldr_u32(supers_cnt, Address(supers_arr, Array::length_offset_in_bytes())); // Load the array length -#ifdef AARCH64 - cbz(supers_cnt, not_subtype); - add(supers_arr, supers_arr, Array::base_offset_in_bytes()); -#else cmp(supers_cnt, 0); // Skip to the start of array elements and prefetch the first super-klass. ldr(cur_super, Address(supers_arr, Array::base_offset_in_bytes(), pre_indexed), ne); b(not_subtype, eq); -#endif // AARCH64 bind(loop); -#ifdef AARCH64 - ldr(cur_super, Address(supers_arr, wordSize, post_indexed)); -#endif // AARCH64 cmp(cur_super, Rsuper_klass); b(update_cache, eq); subs(supers_cnt, supers_cnt, 1); -#ifndef AARCH64 ldr(cur_super, Address(supers_arr, wordSize, pre_indexed), ne); -#endif // !AARCH64 b(loop, ne); @@ -419,33 +382,18 @@ zap_high_non_significant_bits(r); } -#ifdef AARCH64 -void InterpreterMacroAssembler::pop_l(Register r) { - assert(r != Rstack_top, "unpredictable instruction"); - ldr(r, Address(Rstack_top, 2*wordSize, post_indexed)); -} -#else void InterpreterMacroAssembler::pop_l(Register lo, Register hi) { assert_different_registers(lo, hi); assert(lo < hi, "lo must be < hi"); pop(RegisterSet(lo) | RegisterSet(hi)); } -#endif // AARCH64 void InterpreterMacroAssembler::pop_f(FloatRegister fd) { -#ifdef AARCH64 - ldr_s(fd, Address(Rstack_top, wordSize, post_indexed)); -#else fpops(fd); -#endif // AARCH64 } void InterpreterMacroAssembler::pop_d(FloatRegister fd) { -#ifdef AARCH64 - ldr_d(fd, Address(Rstack_top, 2*wordSize, post_indexed)); -#else fpopd(fd); -#endif // AARCH64 } @@ -458,11 +406,7 @@ case ctos: // fall through case stos: // fall through case itos: pop_i(R0_tos); break; -#ifdef AARCH64 - case ltos: pop_l(R0_tos); break; -#else case ltos: pop_l(R0_tos_lo, R1_tos_hi); break; -#endif // AARCH64 #ifdef __SOFTFP__ case ftos: pop_i(R0_tos); break; case dtos: pop_l(R0_tos_lo, R1_tos_hi); break; @@ -488,36 +432,18 @@ check_stack_top_on_expansion(); } -#ifdef AARCH64 -void InterpreterMacroAssembler::push_l(Register r) { - assert(r != Rstack_top, "unpredictable instruction"); - stp(r, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); - check_stack_top_on_expansion(); -} -#else void InterpreterMacroAssembler::push_l(Register lo, Register hi) { assert_different_registers(lo, hi); assert(lo < hi, "lo must be < hi"); push(RegisterSet(lo) | RegisterSet(hi)); } -#endif // AARCH64 void InterpreterMacroAssembler::push_f() { -#ifdef AARCH64 - str_s(S0_tos, Address(Rstack_top, -wordSize, pre_indexed)); - check_stack_top_on_expansion(); -#else fpushs(S0_tos); -#endif // AARCH64 } void InterpreterMacroAssembler::push_d() { -#ifdef AARCH64 - str_d(D0_tos, Address(Rstack_top, -2*wordSize, pre_indexed)); - check_stack_top_on_expansion(); -#else fpushd(D0_tos); -#endif // AARCH64 } // Transition state -> vtos. Blows Rtemp. @@ -530,11 +456,7 @@ case ctos: // fall through case stos: // fall through case itos: push_i(R0_tos); break; -#ifdef AARCH64 - case ltos: push_l(R0_tos); break; -#else case ltos: push_l(R0_tos_lo, R1_tos_hi); break; -#endif // AARCH64 #ifdef __SOFTFP__ case ftos: push_i(R0_tos); break; case dtos: push_l(R0_tos_lo, R1_tos_hi); break; @@ -548,7 +470,6 @@ } -#ifndef AARCH64 // Converts return value in R0/R1 (interpreter calling conventions) to TOS cached value. void InterpreterMacroAssembler::convert_retval_to_tos(TosState state) { @@ -576,7 +497,6 @@ #endif // !__SOFTFP__ && !__ABI_HARD__ } -#endif // !AARCH64 // Helpers for swap and dup @@ -590,20 +510,12 @@ void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() { -#ifdef AARCH64 - check_no_cached_stack_top(Rtemp); - save_stack_top(); - cut_sp_before_call(); - mov(Rparams, Rstack_top); -#endif // AARCH64 // set sender sp mov(Rsender_sp, SP); -#ifndef AARCH64 // record last_sp str(Rsender_sp, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); -#endif // !AARCH64 } // Jump to from_interpreted entry of a call unless single stepping is possible @@ -619,19 +531,8 @@ // interp_only_mode if these events CAN be enabled. ldr_s32(Rtemp, Address(Rthread, JavaThread::interp_only_mode_offset())); -#ifdef AARCH64 - { - Label not_interp_only_mode; - - cbz(Rtemp, not_interp_only_mode); - indirect_jump(Address(method, Method::interpreter_entry_offset()), Rtemp); - - bind(not_interp_only_mode); - } -#else cmp(Rtemp, 0); ldr(PC, Address(method, Method::interpreter_entry_offset()), ne); -#endif // AARCH64 } indirect_jump(Address(method, Method::from_interpreted_offset()), Rtemp); @@ -658,12 +559,7 @@ bool verifyoop) { if (VerifyActivationFrameSize) { Label L; -#ifdef AARCH64 - mov(Rtemp, SP); - sub(Rtemp, FP, Rtemp); -#else sub(Rtemp, FP, SP); -#endif // AARCH64 int min_frame_size = (frame::link_offset - frame::interpreter_frame_initial_sp_offset) * wordSize; cmp(Rtemp, min_frame_size); b(L, ge); @@ -692,16 +588,10 @@ if (state == vtos) { indirect_jump(Address::indexed_ptr(RdispatchTable, R3_bytecode), Rtemp); } else { -#ifdef AARCH64 - sub(Rtemp, R3_bytecode, (Interpreter::distance_from_dispatch_table(vtos) - - Interpreter::distance_from_dispatch_table(state))); - indirect_jump(Address::indexed_ptr(RdispatchTable, Rtemp), Rtemp); -#else // on 32-bit ARM this method is faster than the one above. sub(Rtemp, RdispatchTable, (Interpreter::distance_from_dispatch_table(vtos) - Interpreter::distance_from_dispatch_table(state)) * wordSize); indirect_jump(Address::indexed_ptr(Rtemp, R3_bytecode), Rtemp); -#endif } } else { assert(table_mode == DispatchNormal, "invalid dispatch table mode"); @@ -897,25 +787,18 @@ // points to word before bottom of monitor block cmp(Rcur, Rbottom); // check if there are no monitors -#ifndef AARCH64 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object -#endif // !AARCH64 b(no_unlock, eq); bind(loop); -#ifdef AARCH64 - ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes())); -#endif // AARCH64 // check if current entry is used cbnz(Rcur_obj, exception_monitor_is_still_locked); add(Rcur, Rcur, entry_size); // otherwise advance to next entry cmp(Rcur, Rbottom); // check if bottom reached -#ifndef AARCH64 ldr(Rcur_obj, Address(Rcur, BasicObjectLock::obj_offset_in_bytes()), ne); // prefetch monitor's object -#endif // !AARCH64 b(loop, ne); // if not at bottom then check this entry } @@ -929,15 +812,9 @@ } // remove activation -#ifdef AARCH64 - ldr(Rtemp, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); - ldp(FP, LR, Address(FP)); - mov(SP, Rtemp); -#else mov(Rtemp, FP); ldmia(FP, RegisterSet(FP) | RegisterSet(LR)); ldr(SP, Address(Rtemp, frame::interpreter_frame_sender_sp_offset * wordSize)); -#endif if (ret_addr != LR) { mov(ret_addr, LR); @@ -965,7 +842,7 @@ // // Argument: R1 : Points to BasicObjectLock to be used for locking. // Must be initialized with object to lock. -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. +// Blows volatile registers R0-R3, Rtemp, LR. Calls VM. void InterpreterMacroAssembler::lock_object(Register Rlock) { assert(Rlock == R1, "the second argument"); @@ -991,15 +868,6 @@ biased_locking_enter(Robj, Rmark/*scratched*/, R0, false, Rtemp, done, slow_case); } -#ifdef AARCH64 - assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); - ldr(Rmark, Robj); - - // Test if object is already locked - assert(markOopDesc::unlocked_value == 1, "adjust this code"); - tbz(Rmark, exact_log2(markOopDesc::unlocked_value), already_locked); - -#else // AARCH64 // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. // That would be acceptable as ether CAS or slow case path is taken in that case. @@ -1013,7 +881,6 @@ tst(Rmark, markOopDesc::unlocked_value); b(already_locked, eq); -#endif // !AARCH64 // Save old object->mark() into BasicLock's displaced header str(Rmark, Address(Rlock, mark_offset)); @@ -1059,19 +926,6 @@ // conditions into a single test: // => ((mark - SP) & (3 - os::pagesize())) == 0 -#ifdef AARCH64 - // Use the single check since the immediate is OK for AARCH64 - sub(R0, Rmark, Rstack_top); - intptr_t mask = ((intptr_t)3) - ((intptr_t)os::vm_page_size()); - Assembler::LogicalImmediate imm(mask, false); - ands(R0, R0, imm); - - // For recursive case store 0 into lock record. - // It is harmless to store it unconditionally as lock record contains some garbage - // value in its _displaced_header field by this moment. - str(ZR, Address(Rlock, mark_offset)); - -#else // AARCH64 // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand. // Check independently the low bits and the distance to SP. // -1- test low 2 bits @@ -1082,7 +936,6 @@ // If still 'eq' then recursive locking OK: store 0 into lock record str(R0, Address(Rlock, mark_offset), eq); -#endif // AARCH64 #ifndef PRODUCT if (PrintBiasedLockingStatistics) { @@ -1106,7 +959,7 @@ // // Argument: R1: Points to BasicObjectLock structure for lock // Throw an IllegalMonitorException if object is not locked by current thread -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. Calls VM. +// Blows volatile registers R0-R3, Rtemp, LR. Calls VM. void InterpreterMacroAssembler::unlock_object(Register Rlock) { assert(Rlock == R1, "the second argument"); @@ -1168,7 +1021,7 @@ // Set the method data pointer for the current bcp. -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64), Rtemp, LR. +// Blows volatile registers R0-R3, Rtemp, LR. void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { assert(ProfileInterpreter, "must be profiling interpreter"); Label set_mdp; @@ -1265,22 +1118,12 @@ // Decrement the register. Set condition codes. subs(bumped_count, bumped_count, DataLayout::counter_increment); // Avoid overflow. -#ifdef AARCH64 - assert(DataLayout::counter_increment == 1, "required for cinc"); - cinc(bumped_count, bumped_count, pl); -#else add(bumped_count, bumped_count, DataLayout::counter_increment, pl); -#endif // AARCH64 } else { // Increment the register. Set condition codes. adds(bumped_count, bumped_count, DataLayout::counter_increment); // Avoid overflow. -#ifdef AARCH64 - assert(DataLayout::counter_increment == 1, "required for cinv"); - cinv(bumped_count, bumped_count, mi); // inverts 0x80..00 back to 0x7f..ff -#else sub(bumped_count, bumped_count, DataLayout::counter_increment, mi); -#endif // AARCH64 } str(bumped_count, data); } @@ -1328,7 +1171,7 @@ } -// Blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// Blows volatile registers R0-R3, Rtemp, LR). void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { assert(ProfileInterpreter, "must be profiling interpreter"); assert_different_registers(return_bci, R0, R1, R2, R3, Rtemp); @@ -1542,7 +1385,7 @@ bind (done); } -// Sets mdp, blows volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// Sets mdp, blows volatile registers R0-R3, Rtemp, LR). void InterpreterMacroAssembler::profile_ret(Register mdp, Register return_bci) { assert_different_registers(mdp, return_bci, Rtemp, R0, R1, R2, R3); @@ -1704,9 +1547,6 @@ void InterpreterMacroAssembler::byteswap_u32(Register r, Register rtmp1, Register rtmp2) { -#ifdef AARCH64 - rev_w(r, r); -#else if (VM_Version::supports_rev()) { rev(r, r); } else { @@ -1715,7 +1555,6 @@ andr(rtmp1, rtmp2, AsmOperand(rtmp1, lsr, 8)); eor(r, rtmp1, AsmOperand(r, ror, 8)); } -#endif // AARCH64 } @@ -1723,7 +1562,7 @@ const intx addr = (intx) (address_of_counter + offset); assert ((addr & 0x3) == 0, "address of counter should be aligned"); - const intx offset_mask = right_n_bits(AARCH64_ONLY(12 + 2) NOT_AARCH64(12)); + const intx offset_mask = right_n_bits(12); const address base = (address) (addr & ~offset_mask); const int offs = (int) (addr & offset_mask); @@ -1736,14 +1575,7 @@ if (avoid_overflow) { adds_32(val, val, 1); -#ifdef AARCH64 - Label L; - b(L, mi); - str_32(val, Address(addr_base, offs)); - bind(L); -#else str(val, Address(addr_base, offs), pl); -#endif // AARCH64 } else { add_32(val, val, 1); str_32(val, Address(addr_base, offs)); @@ -1823,17 +1655,9 @@ if (native) { // For c++ and template interpreter push both result registers on the // stack in native, we don't know the state. - // On AArch64 result registers are stored into the frame at known locations. // See frame::interpreter_frame_result for code that gets the result values from here. assert(result_lo != noreg, "result registers should be defined"); -#ifdef AARCH64 - assert(result_hi == noreg, "result_hi is not used on AArch64"); - assert(result_fp != fnoreg, "FP result register must be defined"); - - str_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); - str(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); -#else assert(result_hi != noreg, "result registers should be defined"); #ifdef __ABI_HARD__ @@ -1843,20 +1667,14 @@ #endif // __ABI_HARD__ push(RegisterSet(result_lo) | RegisterSet(result_hi)); -#endif // AARCH64 call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); -#ifdef AARCH64 - ldr_d(result_fp, Address(FP, frame::interpreter_frame_fp_saved_result_offset * wordSize)); - ldr(result_lo, Address(FP, frame::interpreter_frame_gp_saved_result_offset * wordSize)); -#else pop(RegisterSet(result_lo) | RegisterSet(result_hi)); #ifdef __ABI_HARD__ fldd(result_fp, Address(SP)); add(SP, SP, 2 * wordSize); #endif // __ABI_HARD__ -#endif // AARCH64 } else { // For the template interpreter, the value on tos is the size of the @@ -1932,13 +1750,8 @@ add(scratch, scratch, increment); str_32(scratch, counter_addr); -#ifdef AARCH64 - ldr_u32(scratch2, mask_addr); - ands_w(ZR, scratch, scratch2); -#else ldr(scratch2, mask_addr); andrs(scratch, scratch, scratch2); -#endif // AARCH64 b(*where, cond); } @@ -1959,26 +1772,15 @@ // Save and restore in use caller-saved registers since they will be trashed by call_VM assert(reg1 != noreg, "must specify reg1"); assert(reg2 != noreg, "must specify reg2"); -#ifdef AARCH64 - assert(reg3 != noreg, "must specify reg3"); - stp(reg1, reg2, Address(Rstack_top, -2*wordSize, pre_indexed)); - stp(reg3, ZR, Address(Rstack_top, -2*wordSize, pre_indexed)); -#else assert(reg3 == noreg, "must not specify reg3"); push(RegisterSet(reg1) | RegisterSet(reg2)); -#endif } mov(R1, method); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), R1); if (saveRegs) { -#ifdef AARCH64 - ldp(reg3, ZR, Address(Rstack_top, 2*wordSize, post_indexed)); - ldp(reg1, reg2, Address(Rstack_top, 2*wordSize, post_indexed)); -#else pop(RegisterSet(reg1) | RegisterSet(reg2)); -#endif } ldr(Rcounters, method_counters);