--- old/src/cpu/aarch64/vm/aarch64.ad 2014-12-09 13:49:39.593765486 -0500 +++ new/src/cpu/aarch64/vm/aarch64.ad 2014-12-09 13:49:39.223798253 -0500 @@ -785,13 +785,12 @@ static int emit_deopt_handler(CodeBuffer& cbuf); static uint size_exception_handler() { - // count up to 4 movz/n/k instructions and one branch instruction - return 5 * NativeInstruction::instruction_size; + return MacroAssembler::far_branch_size(); } static uint size_deopt_handler() { - // count one adr and one branch instruction - return 2 * NativeInstruction::instruction_size; + // count one adr and one far branch instruction + return 4 * NativeInstruction::instruction_size; } }; @@ -859,7 +858,7 @@ int MachCallRuntimeNode::ret_addr_offset() { // for generated stubs the call will be - // bl(addr) + // far_call(addr) // for real runtime callouts it will be six instructions // see aarch64_enc_java_to_runtime // adr(rscratch2, retaddr) @@ -868,7 +867,7 @@ // blrt rscratch1 CodeBlob *cb = CodeCache::find_blob(_entry_point); if (cb) { - return NativeInstruction::instruction_size; + return MacroAssembler::far_branch_size(); } else { return 6 * NativeInstruction::instruction_size; } @@ -1468,13 +1467,12 @@ // This is the unverified entry point. MacroAssembler _masm(&cbuf); - // no need to worry about 4-byte of br alignment on AArch64 __ cmp_klass(j_rarg0, rscratch2, rscratch1); Label skip; // TODO // can we avoid this skip and still use a reloc? __ br(Assembler::EQ, skip); - __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); __ bind(skip); } @@ -1499,7 +1497,7 @@ __ start_a_stub(size_exception_handler()); if (base == NULL) return 0; // CodeBuffer::expand failed int offset = __ offset(); - __ b(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); + __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); __ end_a_stub(); return offset; @@ -1517,8 +1515,7 @@ int offset = __ offset(); __ adr(lr, __ pc()); - // should we load this into rscratch1 and use a br? - __ b(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); __ end_a_stub(); @@ -2746,15 +2743,14 @@ enc_class aarch64_enc_java_static_call(method meth) %{ MacroAssembler _masm(&cbuf); - cbuf.set_insts_mark(); address addr = (address)$meth$$method; if (!_method) { // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. - __ bl(Address(addr, relocInfo::runtime_call_type)); + __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); } else if (_optimized_virtual) { - __ bl(Address(addr, relocInfo::opt_virtual_call_type)); + __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf); } else { - __ bl(Address(addr, relocInfo::static_call_type)); + __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf); } if (_method) { @@ -2771,15 +2767,15 @@ // Use it to preserve SP. __ mov(rfp, sp); - cbuf.set_insts_mark(); + const int start_offset = __ offset(); address addr = (address)$meth$$method; if (!_method) { // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. - __ bl(Address(addr, relocInfo::runtime_call_type)); + __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); } else if (_optimized_virtual) { - __ bl(Address(addr, relocInfo::opt_virtual_call_type)); + __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf); } else { - __ bl(Address(addr, relocInfo::static_call_type)); + __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf); } if (_method) { @@ -2814,7 +2810,7 @@ address entry = (address)$meth$$method; CodeBlob *cb = CodeCache::find_blob(entry); if (cb) { - __ bl(Address(entry)); + __ trampoline_call(Address(entry, relocInfo::runtime_call_type)); } else { int gpcnt; int fpcnt; @@ -2833,7 +2829,7 @@ enc_class aarch64_enc_rethrow() %{ MacroAssembler _masm(&cbuf); - __ b(RuntimeAddress(OptoRuntime::rethrow_stub())); + __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub())); %} enc_class aarch64_enc_ret() %{ --- old/src/cpu/aarch64/vm/assembler_aarch64.cpp 2014-12-09 13:49:41.743575083 -0500 +++ new/src/cpu/aarch64/vm/assembler_aarch64.cpp 2014-12-09 13:49:41.383606964 -0500 @@ -63,7 +63,6 @@ // #include "gc_implementation/g1/heapRegion.hpp" // #endif - extern "C" void entry(CodeBuffer *cb); #define __ _masm. @@ -1362,7 +1361,6 @@ if (L.is_bound()) { br(cc, target(L)); } else { - InstructionMark im(this); L.add_patch_at(code(), locator()); br(cc, pc()); } @@ -1373,7 +1371,6 @@ if (L.is_bound()) { (this->*insn)(target(L)); } else { - InstructionMark im(this); L.add_patch_at(code(), locator()); (this->*insn)(pc()); } @@ -1384,7 +1381,6 @@ if (L.is_bound()) { (this->*insn)(r, target(L)); } else { - InstructionMark im(this); L.add_patch_at(code(), locator()); (this->*insn)(r, pc()); } @@ -1395,7 +1391,6 @@ if (L.is_bound()) { (this->*insn)(r, bitpos, target(L)); } else { - InstructionMark im(this); L.add_patch_at(code(), locator()); (this->*insn)(r, bitpos, pc()); } @@ -1405,7 +1400,6 @@ if (L.is_bound()) { (this->*insn)(target(L), op); } else { - InstructionMark im(this); L.add_patch_at(code(), locator()); (this->*insn)(pc(), op); } --- old/src/cpu/aarch64/vm/assembler_aarch64.hpp 2014-12-09 13:49:43.263440472 -0500 +++ new/src/cpu/aarch64/vm/assembler_aarch64.hpp 2014-12-09 13:49:42.933469697 -0500 @@ -839,16 +839,27 @@ #undef INSN + // The maximum range of a branch is fixed for the AArch64 + // architecture. In debug mode we shrink it in order to test + // trampolines, but not so small that branches in the interpreter + // are out of range. + static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); + + static bool reachable_from_branch_at(address branch, address target) { + return uabs(target - branch) < branch_range; + } + // Unconditional branch (immediate) -#define INSN(NAME, opcode) \ - void NAME(address dest) { \ - starti; \ - long offset = (dest - pc()) >> 2; \ - f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0); \ - } \ - void NAME(Label &L) { \ - wrap_label(L, &Assembler::NAME); \ - } \ +#define INSN(NAME, opcode) \ + void NAME(address dest) { \ + starti; \ + long offset = (dest - pc()) >> 2; \ + DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \ + f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0); \ + } \ + void NAME(Label &L) { \ + wrap_label(L, &Assembler::NAME); \ + } \ void NAME(const Address &dest); INSN(b, 0); --- old/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp 2014-12-09 13:49:44.813303204 -0500 +++ new/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp 2014-12-09 13:49:44.433336857 -0500 @@ -44,7 +44,7 @@ __ bind(_entry); ce->store_parameter(_method->as_register(), 1); ce->store_parameter(_bci, 0); - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id))); ce->add_call_info_here(_info); ce->verify_oop_map(_info); __ b(_continuation); @@ -63,7 +63,7 @@ __ bind(_entry); if (_info->deoptimize_on_exception()) { address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); - __ call(RuntimeAddress(a)); + __ far_call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); debug_only(__ should_not_reach_here()); @@ -81,7 +81,7 @@ } else { stub_id = Runtime1::throw_range_check_failed_id; } - __ call(RuntimeAddress(Runtime1::entry_for(stub_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(stub_id))); ce->add_call_info_here(_info); ce->verify_oop_map(_info); debug_only(__ should_not_reach_here()); @@ -94,7 +94,7 @@ void PredicateFailedStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id); - __ call(RuntimeAddress(a)); + __ far_call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); debug_only(__ should_not_reach_here()); @@ -105,7 +105,7 @@ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); } __ bind(_entry); - __ bl(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type)); + __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); #ifdef ASSERT @@ -135,7 +135,7 @@ assert(__ rsp_offset() == 0, "frame size should be fixed"); __ bind(_entry); __ mov(r3, _klass_reg->as_register()); - __ bl(RuntimeAddress(Runtime1::entry_for(_stub_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id))); ce->add_call_info_here(_info); ce->verify_oop_map(_info); assert(_result->as_register() == r0, "result must in r0,"); @@ -160,7 +160,7 @@ __ bind(_entry); assert(_length->as_register() == r19, "length must in r19,"); assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); - __ bl(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id))); ce->add_call_info_here(_info); ce->verify_oop_map(_info); assert(_result->as_register() == r0, "result must in r0"); @@ -183,7 +183,7 @@ __ bind(_entry); assert(_length->as_register() == r19, "length must in r19,"); assert(_klass_reg->as_register() == r3, "klass_reg must in r3"); - __ bl(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id))); ce->add_call_info_here(_info); ce->verify_oop_map(_info); assert(_result->as_register() == r0, "result must in r0"); @@ -209,7 +209,7 @@ } else { enter_id = Runtime1::monitorenter_nofpu_id; } - __ bl(RuntimeAddress(Runtime1::entry_for(enter_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id))); ce->add_call_info_here(_info); ce->verify_oop_map(_info); __ b(_continuation); @@ -231,7 +231,7 @@ exit_id = Runtime1::monitorexit_nofpu_id; } __ adr(lr, _continuation); - __ b(RuntimeAddress(Runtime1::entry_for(exit_id))); + __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id))); } @@ -255,7 +255,7 @@ void DeoptimizeStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id))); ce->add_call_info_here(_info); DEBUG_ONLY(__ should_not_reach_here()); } @@ -272,7 +272,7 @@ ce->compilation()->implicit_exception_table()->append(_offset, __ offset()); __ bind(_entry); - __ call(RuntimeAddress(a)); + __ far_call(RuntimeAddress(a)); ce->add_call_info_here(_info); ce->verify_oop_map(_info); debug_only(__ should_not_reach_here()); @@ -288,7 +288,7 @@ if (_obj->is_cpu_register()) { __ mov(rscratch1, _obj->as_register()); } - __ call(RuntimeAddress(Runtime1::entry_for(_stub))); + __ far_call(RuntimeAddress(Runtime1::entry_for(_stub))); ce->add_call_info_here(_info); debug_only(__ should_not_reach_here()); } @@ -330,7 +330,7 @@ ce->emit_static_call_stub(); Address resolve(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type); - __ bl(resolve); + __ trampoline_call(resolve); ce->add_call_info_here(info()); #ifndef PRODUCT @@ -361,7 +361,7 @@ } __ cbz(pre_val_reg, _continuation); ce->store_parameter(pre_val()->as_register(), 0); - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id))); __ b(_continuation); } @@ -382,7 +382,7 @@ Register new_val_reg = new_val()->as_register(); __ cbz(new_val_reg, _continuation); ce->store_parameter(addr()->as_pointer_register(), 0); - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id))); __ b(_continuation); } --- old/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp 2014-12-09 13:49:46.403162394 -0500 +++ new/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp 2014-12-09 13:49:46.053193390 -0500 @@ -297,7 +297,7 @@ // Note: RECEIVER must still contain the receiver! Label dont; __ br(Assembler::EQ, dont); - __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); // We align the verified entry point unless the method body // (including its inline cache check) will fit in a single 64-byte @@ -344,7 +344,7 @@ default: ShouldNotReachHere(); } - __ bl(RuntimeAddress(target)); + __ far_call(RuntimeAddress(target)); add_call_info_here(info); } @@ -390,8 +390,7 @@ __ verify_not_null_oop(r0); // search an exception handler (r0: exception oop, r3: throwing pc) - __ bl(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id))); - __ should_not_reach_here(); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id))); __ should_not_reach_here(); guarantee(code_offset() - offset <= exception_handler_size, "overflow"); __ end_a_stub(); @@ -446,7 +445,7 @@ // remove the activation and dispatch to the unwind handler __ block_comment("remove_frame and dispatch to the unwind handler"); __ remove_frame(initial_frame_size_in_bytes()); - __ b(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id))); // Emit the slow path assembly if (stub != NULL) { @@ -476,7 +475,7 @@ int offset = code_offset(); __ adr(lr, pc()); - __ b(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); + __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); guarantee(code_offset() - offset <= deopt_handler_size, "overflow"); __ end_a_stub(); @@ -954,7 +953,7 @@ default: ShouldNotReachHere(); } - __ bl(RuntimeAddress(target)); + __ far_call(RuntimeAddress(target)); add_call_info_here(info); } @@ -1425,7 +1424,7 @@ __ br(Assembler::EQ, *success_target); __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize))); - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); __ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize))); // result is a boolean __ cbzw(klass_RInfo, *failure_target); @@ -1436,7 +1435,7 @@ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); // call out-of-line instance of __ check_klass_subtype_slow_path(...): __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize))); - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize))); // result is a boolean __ cbz(k_RInfo, *failure_target); @@ -1526,7 +1525,7 @@ __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL); // call out-of-line instance of __ check_klass_subtype_slow_path(...): __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize))); - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize))); // result is a boolean __ cbzw(k_RInfo, *failure_target); @@ -2017,7 +2016,7 @@ void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) { - __ bl(Address(op->addr(), rtype)); + __ trampoline_call(Address(op->addr(), rtype)); add_call_info(code_offset(), op->info()); } @@ -2046,7 +2045,8 @@ __ relocate(static_stub_Relocation::spec(call_pc)); __ mov_metadata(rmethod, (Metadata*)NULL); - __ b(__ pc()); + __ movptr(rscratch1, 0); + __ br(rscratch1); assert(__ offset() - start <= call_stub_size, "stub too big"); __ end_a_stub(); @@ -2076,7 +2076,7 @@ } else { unwind_id = Runtime1::handle_exception_nofpu_id; } - __ bl(RuntimeAddress(Runtime1::entry_for(unwind_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id))); // FIXME: enough room for two byte trap ???? __ nop(); @@ -2239,7 +2239,7 @@ __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt)); } #endif - __ bl(RuntimeAddress(copyfunc_addr)); + __ far_call(RuntimeAddress(copyfunc_addr)); } __ cbz(r0, *stub->continuation()); @@ -2352,7 +2352,7 @@ __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL); __ PUSH(src, dst); - __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); + __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); __ POP(src, dst); __ cbnz(src, cont); @@ -2402,7 +2402,7 @@ __ load_klass(c_rarg4, dst); __ ldr(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset())); __ ldrw(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset())); - __ call(RuntimeAddress(copyfunc_addr)); + __ far_call(RuntimeAddress(copyfunc_addr)); #ifndef PRODUCT if (PrintC1Statistics) { @@ -2517,7 +2517,7 @@ CodeBlob *cb = CodeCache::find_blob(entry); if (cb) { - __ bl(RuntimeAddress(entry)); + __ far_call(RuntimeAddress(entry)); } else { __ call_VM_leaf(entry, 3); } @@ -2855,7 +2855,7 @@ CodeBlob *cb = CodeCache::find_blob(dest); if (cb) { - __ bl(RuntimeAddress(dest)); + __ far_call(RuntimeAddress(dest)); } else { __ mov(rscratch1, RuntimeAddress(dest)); int len = args->length(); --- old/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp 2014-12-09 13:49:47.943026012 -0500 +++ new/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp 2014-12-09 13:49:47.633053466 -0500 @@ -72,9 +72,8 @@ void store_parameter(jint c, int offset_from_esp_in_words); void store_parameter(jobject c, int offset_from_esp_in_words); - enum { call_stub_size = NOT_LP64(15) LP64_ONLY(28), - exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), - deopt_handler_size = NOT_LP64(10) LP64_ONLY(17) - }; +enum { call_stub_size = 12 * NativeInstruction::instruction_size, + exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), + deopt_handler_size = 7 * NativeInstruction::instruction_size }; #endif // CPU_X86_VM_C1_LIRASSEMBLER_X86_HPP --- old/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp 2014-12-09 13:49:49.452892287 -0500 +++ new/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp 2014-12-09 13:49:49.122921512 -0500 @@ -351,7 +351,7 @@ if (CURRENT_ENV->dtrace_alloc_probes()) { assert(obj == r0, "must be"); - call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); } verify_oop(obj); @@ -385,7 +385,7 @@ if (CURRENT_ENV->dtrace_alloc_probes()) { assert(obj == r0, "must be"); - bl(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); + far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id))); } verify_oop(obj); --- old/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp 2014-12-09 13:49:50.862767418 -0500 +++ new/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp 2014-12-09 13:49:50.542795757 -0500 @@ -97,11 +97,11 @@ } if (frame_size() == no_frame_size) { leave(); - b(RuntimeAddress(StubRoutines::forward_exception_entry())); + far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); } else if (_stub_id == Runtime1::forward_exception_id) { should_not_reach_here(); } else { - b(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); } bind(L); } @@ -580,7 +580,7 @@ { Label L1; __ cbnz(r0, L1); // have we deoptimized? - __ b(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); + __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id))); __ bind(L1); } @@ -624,7 +624,7 @@ // registers and must leave throwing pc on the stack. A patch may // have values live in registers so the entry point with the // exception in tls. - __ b(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls())); __ bind(L); } @@ -641,7 +641,7 @@ // registers, pop all of our frame but the return address and jump to the deopt blob restore_live_registers(sasm); __ leave(); - __ b(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); __ bind(cont); restore_live_registers(sasm); @@ -1095,7 +1095,7 @@ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); assert(deopt_blob != NULL, "deoptimization blob must have been created"); __ leave(); - __ b(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); } break; @@ -1304,7 +1304,7 @@ DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob(); assert(deopt_blob != NULL, "deoptimization blob must have been created"); - __ b(RuntimeAddress(deopt_blob->unpack_with_reexecution())); + __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution())); } break; --- old/src/cpu/aarch64/vm/compiledIC_aarch64.cpp 2014-12-09 13:49:52.342636350 -0500 +++ new/src/cpu/aarch64/vm/compiledIC_aarch64.cpp 2014-12-09 13:49:52.012665574 -0500 @@ -70,7 +70,8 @@ __ relocate(static_stub_Relocation::spec(mark)); // static stub relocation also tags the Method* in the code-stream. __ mov_metadata(rmethod, (Metadata*)NULL); - __ b(__ pc()); + __ movptr(rscratch1, 0); + __ br(rscratch1); assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big"); __ end_a_stub(); @@ -78,8 +79,7 @@ #undef __ int CompiledStaticCall::to_interp_stub_size() { - // count a mov mem --> to 3 movz/k and a branch - return 4 * NativeInstruction::instruction_size; + return 7 * NativeInstruction::instruction_size; } // Relocation entries for call stub, compiled java to interpreter. --- old/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp 2014-12-09 13:49:53.832504396 -0500 +++ new/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp 2014-12-09 13:49:53.482535391 -0500 @@ -37,9 +37,9 @@ #define SUPPORTS_NATIVE_CX8 -// The maximum B/BL offset range on AArch64 is 128MB -#undef CODE_CACHE_SIZE_LIMIT -#define CODE_CACHE_SIZE_LIMIT (128*M) +// The maximum B/BL offset range on AArch64 is 128MB. +#undef CODE_CACHE_DEFAULT_LIMIT +#define CODE_CACHE_DEFAULT_LIMIT (128*M) // According to the ARMv8 ARM, "Concurrent modification and execution // of instructions can lead to the resulting instruction performing --- old/src/cpu/aarch64/vm/globals_aarch64.hpp 2014-12-09 13:49:55.292375098 -0500 +++ new/src/cpu/aarch64/vm/globals_aarch64.hpp 2014-12-09 13:49:54.952405209 -0500 @@ -121,8 +121,10 @@ product(bool, UseNeon, false, \ "Use Neon for CRC32 computation") \ product(bool, UseCRC32, false, \ - "Use CRC32 instructions for CRC32 computation") + "Use CRC32 instructions for CRC32 computation") \ + product(bool, TraceTraps, false, "Trace all traps the signal handler") #endif + #endif // CPU_AARCH64_VM_GLOBALS_AARCH64_HPP --- old/src/cpu/aarch64/vm/icBuffer_aarch64.cpp 2014-12-09 13:49:56.782243144 -0500 +++ new/src/cpu/aarch64/vm/icBuffer_aarch64.cpp 2014-12-09 13:49:56.422275026 -0500 @@ -36,9 +36,10 @@ #include "oops/oop.inline2.hpp" int InlineCacheBuffer::ic_stub_code_size() { - return NativeInstruction::instruction_size * 5; + return (MacroAssembler::far_branches() ? 6 : 4) * NativeInstruction::instruction_size; } +#define __ masm-> void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { ResourceMark rm; @@ -50,13 +51,16 @@ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear // assert(cached_value == NULL || cached_oop->is_perm(), "must be perm oop"); + address start = __ pc(); Label l; - masm->ldr(rscratch2, l); - masm->b(ExternalAddress(entry_point)); - masm->bind(l); - masm->emit_int64((int64_t)cached_value); + __ ldr(rscratch2, l); + __ far_jump(ExternalAddress(entry_point)); + __ align(wordSize); + __ bind(l); + __ emit_int64((int64_t)cached_value); // Only need to invalidate the 1st two instructions - not the whole ic stub - ICache::invalidate_range(code_begin, NativeInstruction::instruction_size * 2); + ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size()); + assert(__ pc() - start == ic_stub_code_size(), "must be"); } address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { @@ -67,8 +71,8 @@ void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { - // creation also verifies the object - uintptr_t *p = (uintptr_t *)(code_begin + 8); + // The word containing the cached value is at the end of this IC buffer + uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize); void* o = (void*)*p; return o; } --- old/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp 2014-12-09 13:49:58.362103220 -0500 +++ new/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp 2014-12-09 13:49:57.992135987 -0500 @@ -152,7 +152,7 @@ Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff); Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff); Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff); - assert(pd_call_destination(branch) == target, "should be"); + assert(target_addr_for_insn(branch) == target, "should be"); instructions = 3; } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && Instruction_aarch64::extract(insn, 4, 0) == 0b11111) { @@ -220,21 +220,17 @@ // Return the target address for the following sequences // 1 - adrp Rx, target_page // ldr/str Ry, [Rx, #offset_in_page] - // [ 2 - adrp Rx, target_page ] Not handled - // [ add Ry, Rx, #offset_in_page ] + // 2 - adrp Rx, target_page ] + // add Ry, Rx, #offset_in_page // 3 - adrp Rx, target_page (page aligned reloc, offset == 0) // - // In the case of type 1 we check that the register is the same and + // In the first two cases we check that the register is the same and // return the target_page + the offset within the page. - // // Otherwise we assume it is a page aligned relocation and return // the target page only. The only cases this is generated is for // the safepoint polling page or for the card table byte map base so // we assert as much. // - // Note: Strangely, we do not handle 'type 2' relocation (adrp followed - // by add) which is handled in pd_patch_instruction above. - // unsigned insn2 = ((unsigned*)insn_addr)[1]; if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 && Instruction_aarch64::extract(insn, 4, 0) == @@ -243,6 +239,12 @@ unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10); unsigned int size = Instruction_aarch64::extract(insn2, 31, 30); return address(target_page + (byte_offset << size)); + } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 && + Instruction_aarch64::extract(insn, 4, 0) == + Instruction_aarch64::extract(insn2, 4, 0)) { + // add (immediate) + unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10); + return address(target_page + byte_offset); } else { assert((jbyte *)target_page == ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base || @@ -355,6 +357,42 @@ } } +void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + if (far_branches()) { + unsigned long offset; + // We can use ADRP here because we know that the total size of + // the code cache cannot exceed 2Gb. + adrp(tmp, entry, offset); + add(tmp, tmp, offset); + if (cbuf) cbuf->set_insts_mark(); + blr(tmp); + } else { + if (cbuf) cbuf->set_insts_mark(); + bl(entry); + } +} + +void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) { + assert(ReservedCodeCacheSize < 4*G, "branch out of range"); + assert(CodeCache::find_blob(entry.target()) != NULL, + "destination of far call not found in code cache"); + if (far_branches()) { + unsigned long offset; + // We can use ADRP here because we know that the total size of + // the code cache cannot exceed 2Gb. + adrp(tmp, entry, offset); + add(tmp, tmp, offset); + if (cbuf) cbuf->set_insts_mark(); + br(tmp); + } else { + if (cbuf) cbuf->set_insts_mark(); + b(entry); + } +} + int MacroAssembler::biased_locking_enter(Register lock_reg, Register obj_reg, Register swap_reg, @@ -632,14 +670,74 @@ call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); } -void MacroAssembler::call(Address entry) { - if (true // reachable(entry) - ) { - bl(entry); +// Maybe emit a call via a trampoline. If the code cache is small +// trampolines won't be emitted. + +void MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) { + assert(entry.rspec().type() == relocInfo::runtime_call_type + || entry.rspec().type() == relocInfo::opt_virtual_call_type + || entry.rspec().type() == relocInfo::static_call_type + || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); + + unsigned int start_offset = offset(); + if (far_branches() && !Compile::current()->in_scratch_emit_size()) { + emit_trampoline_stub(offset(), entry.target()); + } + + if (cbuf) cbuf->set_insts_mark(); + relocate(entry.rspec()); + if (Assembler::reachable_from_branch_at(pc(), entry.target())) { + bl(entry.target()); } else { - lea(rscratch1, entry); - blr(rscratch1); + bl(pc()); + } +} + + +// Emit a trampoline stub for a call to a target which is too far away. +// +// code sequences: +// +// call-site: +// branch-and-link to or +// +// Related trampoline stub for this call site in the stub section: +// load the call target from the constant pool +// branch (LR still points to the call site above) + +void MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, + address dest) { + address stub = start_a_stub(Compile::MAX_stubs_size/2); + if (stub == NULL) { + start_a_stub(Compile::MAX_stubs_size/2); + Compile::current()->env()->record_out_of_memory_failure(); + return; } + + // Create a trampoline stub relocation which relates this trampoline stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + align(wordSize); + relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + + insts_call_instruction_offset)); + const int stub_start_offset = offset(); + + // Now, create the trampoline stub's code: + // - load the call + // - call + Label target; + ldr(rscratch1, target); + br(rscratch1); + bind(target); + assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset, + "should be"); + emit_int64((int64_t)dest); + + const address stub_start_addr = addr_at(stub_start_offset); + + assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline"); + + end_a_stub(); } void MacroAssembler::ic_call(address entry) { @@ -648,7 +746,7 @@ // unsigned long offset; // ldr_constant(rscratch2, const_ptr); movptr(rscratch2, (uintptr_t)Universe::non_oop_word()); - call(Address(entry, rh)); + trampoline_call(Address(entry, rh)); } // Implementation of call_VM versions @@ -1296,8 +1394,7 @@ // public methods void MacroAssembler::mov(Register r, Address dest) { - InstructionMark im(this); - code_section()->relocate(inst_mark(), dest.rspec()); + code_section()->relocate(pc(), dest.rspec()); u_int64_t imm64 = (u_int64_t)dest.target(); movptr(r, imm64); } @@ -3413,6 +3510,7 @@ } } + // Search for str1 in str2 and return index or -1 void MacroAssembler::string_indexof(Register str2, Register str1, Register cnt2, Register cnt1, --- old/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp 2014-12-09 13:50:00.091950012 -0500 +++ new/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp 2014-12-09 13:49:59.711983664 -0500 @@ -494,6 +494,10 @@ static bool needs_explicit_null_check(intptr_t offset); static address target_addr_for_insn(address insn_addr, unsigned insn); + static address target_addr_for_insn(address insn_addr) { + unsigned insn = *(unsigned*)insn_addr; + return target_addr_for_insn(insn_addr, insn); + } // Required platform-specific helpers for Label::patch_instructions. // They _shadow_ the declarations in AbstractAssembler, which are undefined. @@ -502,8 +506,7 @@ pd_patch_instruction_size(branch, target); } static address pd_call_destination(address branch) { - unsigned insn = *(unsigned*)branch; - return target_addr_for_insn(branch, insn); + return target_addr_for_insn(branch); } #ifndef PRODUCT static void pd_print_patched_instruction(address branch); @@ -511,6 +514,8 @@ static int patch_oop(address insn_addr, address o); + void emit_trampoline_stub(int insts_call_instruction_offset, address target); + // The following 4 methods return the offset of the appropriate move instruction // Support for fast byte/short loading with zero extension (depending on particular CPU) @@ -916,12 +921,24 @@ // Calls - // void call(Label& L, relocInfo::relocType rtype); + void trampoline_call(Address entry, CodeBuffer *cbuf = NULL); - // NOTE: this call tranfers to the effective address of entry NOT - // the address contained by entry. This is because this is more natural - // for jumps/calls. - void call(Address entry); + static bool far_branches() { + return ReservedCodeCacheSize > branch_range; + } + + // Jumps that can reach anywhere in the code cache. + // Trashes tmp. + void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1); + void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1); + + static int far_branch_size() { + if (far_branches()) { + return 3 * 4; // adrp, add, br + } else { + return 4; + } + } // Emit the CompiledIC call idiom void ic_call(address entry); --- old/src/cpu/aarch64/vm/methodHandles_aarch64.cpp 2014-12-09 13:50:01.711806545 -0500 +++ new/src/cpu/aarch64/vm/methodHandles_aarch64.cpp 2014-12-09 13:50:01.371836655 -0500 @@ -115,7 +115,7 @@ __ ldr(rscratch1,Address(method, entry_offset)); __ br(rscratch1); __ bind(L_no_such_method); - __ b(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); + __ far_jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry())); } void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, @@ -418,7 +418,7 @@ jump_from_method_handle(_masm, rmethod, temp1, for_compiler_entry); if (iid == vmIntrinsics::_linkToInterface) { __ bind(L_incompatible_class_change_error); - __ b(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); } } } --- old/src/cpu/aarch64/vm/nativeInst_aarch64.cpp 2014-12-09 13:50:03.231671934 -0500 +++ new/src/cpu/aarch64/vm/nativeInst_aarch64.cpp 2014-12-09 13:50:02.931698502 -0500 @@ -40,7 +40,73 @@ void NativeCall::verify() { ; } address NativeCall::destination() const { - return instruction_address() + displacement(); + address addr = (address)this; + address destination = instruction_address() + displacement(); + + // Do we use a trampoline stub for this call? + CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. + assert(cb && cb->is_nmethod(), "sanity"); + nmethod *nm = (nmethod *)cb; + if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) { + // Yes we do, so get the destination from the trampoline stub. + const address trampoline_stub_addr = destination; + destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); + } + + return destination; +} + +// Similar to replace_mt_safe, but just changes the destination. The +// important thing is that free-running threads are able to execute this +// call instruction at all times. +// +// Used in the runtime linkage of calls; see class CompiledIC. +// +// Add parameter assert_lock to switch off assertion +// during code generation, where no patching lock is needed. +void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { + assert(!assert_lock || + (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), + "concurrent code patching"); + + ResourceMark rm; + int code_size = NativeInstruction::instruction_size; + address addr_call = addr_at(0); + assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); + + // Patch the constant in the call's trampoline stub. + address trampoline_stub_addr = get_trampoline(); + if (trampoline_stub_addr != NULL) { + assert (! is_NativeCallTrampolineStub_at(dest), "chained trampolines"); + nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); + } + + // Patch the call. + if (Assembler::reachable_from_branch_at(addr_call, dest)) { + set_destination(dest); + } else { + assert (trampoline_stub_addr != NULL, "we need a trampoline"); + set_destination(trampoline_stub_addr); + } + + ICache::invalidate_range(addr_call, instruction_size); +} + +address NativeCall::get_trampoline() { + address call_addr = addr_at(0); + + CodeBlob *code = CodeCache::find_blob(call_addr); + assert(code != NULL, "Could not find the containing code blob"); + + address bl_destination + = MacroAssembler::pd_call_destination(call_addr); + if (code->content_contains(bl_destination) && + is_NativeCallTrampolineStub_at(bl_destination)) + return bl_destination; + + // If the codeBlob is not a nmethod, this is because we get here from the + // CodeBlob constructor, which is called within the nmethod constructor. + return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); } // Inserts a native call instruction at a given pc @@ -55,7 +121,7 @@ intptr_t NativeMovConstReg::data() const { // das(uint64_t(instruction_address()),2); - address addr = MacroAssembler::pd_call_destination(instruction_address()); + address addr = MacroAssembler::target_addr_for_insn(instruction_address()); if (maybe_cpool_ref(instruction_address())) { return *(intptr_t*)addr; } else { @@ -65,7 +131,7 @@ void NativeMovConstReg::set_data(intptr_t x) { if (maybe_cpool_ref(instruction_address())) { - address addr = MacroAssembler::pd_call_destination(instruction_address()); + address addr = MacroAssembler::target_addr_for_insn(instruction_address()); *(intptr_t*)addr = x; } else { MacroAssembler::pd_patch_instruction(instruction_address(), (address)x); @@ -86,10 +152,10 @@ address pc = instruction_address(); unsigned insn = *(unsigned*)pc; if (Instruction_aarch64::extract(insn, 28, 24) == 0b10000) { - address addr = MacroAssembler::pd_call_destination(pc); + address addr = MacroAssembler::target_addr_for_insn(pc); return *addr; } else { - return (int)(intptr_t)MacroAssembler::pd_call_destination(instruction_address()); + return (int)(intptr_t)MacroAssembler::target_addr_for_insn(instruction_address()); } } @@ -97,7 +163,7 @@ address pc = instruction_address(); unsigned insn = *(unsigned*)pc; if (maybe_cpool_ref(pc)) { - address addr = MacroAssembler::pd_call_destination(pc); + address addr = MacroAssembler::target_addr_for_insn(pc); *(long*)addr = x; } else { MacroAssembler::pd_patch_instruction(pc, (address)intptr_t(x)); @@ -107,7 +173,7 @@ void NativeMovRegMem::verify() { #ifdef ASSERT - address dest = MacroAssembler::pd_call_destination(instruction_address()); + address dest = MacroAssembler::target_addr_for_insn(instruction_address()); #endif } @@ -121,7 +187,7 @@ address NativeJump::jump_destination() const { - address dest = MacroAssembler::pd_call_destination(instruction_address()); + address dest = MacroAssembler::target_addr_for_insn(instruction_address()); // We use jump to self as the unresolved address which the inline // cache code (and relocs) know about @@ -192,19 +258,39 @@ return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b11100101; } +bool NativeInstruction::is_sigill_zombie_not_entrant() { + return uint_at(0) == 0xd4bbd5a1; // dcps1 #0xdead +} + +void NativeIllegalInstruction::insert(address code_pos) { + *(juint*)code_pos = 0xd4bbd5a1; // dcps1 #0xdead +} + //------------------------------------------------------------------- -// MT safe inserting of a jump over a jump or a nop (used by nmethod::makeZombie) +// MT-safe inserting of a jump over a jump or a nop (used by +// nmethod::make_not_entrant_or_zombie) void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { - ptrdiff_t disp = dest - verified_entry; - guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow"); - - unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff); - assert(nativeInstruction_at(verified_entry)->is_jump_or_nop(), + assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); + assert(nativeInstruction_at(verified_entry)->is_jump_or_nop() + || nativeInstruction_at(verified_entry)->is_sigill_zombie_not_entrant(), "Aarch64 cannot replace non-jump with jump"); - *(unsigned int*)verified_entry = insn; + + // Patch this nmethod atomically. + if (Assembler::reachable_from_branch_at(verified_entry, dest)) { + ptrdiff_t disp = dest - verified_entry; + guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow"); + + unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff); + *(unsigned int*)verified_entry = insn; + } else { + // We use an illegal instruction for marking a method as + // not_entrant or zombie. + NativeIllegalInstruction::insert(verified_entry); + } + ICache::invalidate_range(verified_entry, instruction_size); } @@ -212,23 +298,28 @@ void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { NativeGeneralJump* n_jump = (NativeGeneralJump*)code_pos; - ptrdiff_t disp = entry - code_pos; - guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow"); - unsigned int insn = (0b000101 << 26) | ((disp >> 2) & 0x3ffffff); - *(unsigned int*)code_pos = insn; + CodeBuffer cb(code_pos, instruction_size); + MacroAssembler a(&cb); + + a.mov(rscratch1, entry); + a.br(rscratch1); + ICache::invalidate_range(code_pos, instruction_size); } // MT-safe patching of a long jump instruction. void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { - NativeGeneralJump* n_jump = (NativeGeneralJump*)instr_addr; - assert(n_jump->is_jump_or_nop(), - "Aarch64 cannot replace non-jump with jump"); - uint32_t instr = *(uint32_t*)code_buffer; - *(uint32_t*)instr_addr = instr; - ICache::invalidate_range(instr_addr, instruction_size); + ShouldNotCallThis(); } bool NativeInstruction::is_dtrace_trap() { return false; } +address NativeCallTrampolineStub::destination(nmethod *nm) const { + return ptr_at(data_offset); +} + +void NativeCallTrampolineStub::set_destination(address new_destination) { + set_ptr_at(data_offset, new_destination); + OrderAccess::release(); +} --- old/src/cpu/aarch64/vm/nativeInst_aarch64.hpp 2014-12-09 13:50:04.871526696 -0500 +++ new/src/cpu/aarch64/vm/nativeInst_aarch64.hpp 2014-12-09 13:50:04.511558578 -0500 @@ -53,6 +53,7 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC { friend class Relocation; + friend bool is_NativeCallTrampolineStub_at(address); public: enum { instruction_size = 4 }; inline bool is_nop(); @@ -66,6 +67,7 @@ inline bool is_mov_literal64(); bool is_movz(); bool is_movk(); + bool is_sigill_zombie_not_entrant(); protected: address addr_at(int offset) const { return address(this) + offset; } @@ -73,16 +75,18 @@ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } - jint int_at(int offset) const { return *(jint*) addr_at(offset); } + jint int_at(int offset) const { return *(jint*) addr_at(offset); } + juint uint_at(int offset) const { return *(juint*) addr_at(offset); } - intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } + address ptr_at(int offset) const { return *(address*) addr_at(offset); } oop oop_at (int offset) const { return *(oop*) addr_at(offset); } void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; } void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; } - void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; } + void set_uint_at(int offset, jint i) { *(juint*)addr_at(offset) = i; } + void set_ptr_at (int offset, address ptr) { *(address*) addr_at(offset) = ptr; } void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; } public: @@ -130,6 +134,7 @@ address displacement_address() const { return addr_at(displacement_offset); } address return_address() const { return addr_at(return_address_offset); } address destination() const; + void set_destination(address dest) { int offset = dest - instruction_address(); unsigned int insn = 0b100101 << 26; @@ -138,22 +143,8 @@ offset &= (1 << 26) - 1; // mask off insn part insn |= offset; set_int_at(displacement_offset, insn); - ICache::invalidate_range(instruction_address(), instruction_size); } - // Similar to replace_mt_safe, but just changes the destination. The - // important thing is that free-running threads are able to execute - // this call instruction at all times. If the call is an immediate BL - // instruction we can simply rely on atomicity of 32-bit writes to - // make sure other threads will see no intermediate states. - - // We cannot rely on locks here, since the free-running threads must run at - // full speed. - // - // Used in the runtime linkage of calls; see class CompiledIC. - // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) - void set_destination_mt_safe(address dest) { set_destination(dest); } - void verify_alignment() { ; } void verify(); void print(); @@ -175,6 +166,23 @@ static void insert(address code_pos, address entry); static void replace_mt_safe(address instr_addr, address code_buffer); + + // Similar to replace_mt_safe, but just changes the destination. The + // important thing is that free-running threads are able to execute + // this call instruction at all times. If the call is an immediate BL + // instruction we can simply rely on atomicity of 32-bit writes to + // make sure other threads will see no intermediate states. + + // We cannot rely on locks here, since the free-running threads must run at + // full speed. + // + // Used in the runtime linkage of calls; see class CompiledIC. + // (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.) + + // The parameter assert_lock disables the assertion during code generation. + void set_destination_mt_safe(address dest, bool assert_lock = true); + + address get_trampoline(); }; inline NativeCall* nativeCall_at(address address) { @@ -378,10 +386,10 @@ class NativeGeneralJump: public NativeJump { public: enum AArch64_specific_constants { - instruction_size = 4, + instruction_size = 4 * 4, instruction_offset = 0, data_offset = 0, - next_instruction_offset = 4 + next_instruction_offset = 4 * 4 }; static void insert_unconditional(address code_pos, address entry); static void replace_mt_safe(address instr_addr, address code_buffer); @@ -450,4 +458,34 @@ return is_nop() || is_jump(); } +// Call trampoline stubs. +class NativeCallTrampolineStub : public NativeInstruction { + public: + + enum AArch64_specific_constants { + instruction_size = 4 * 4, + instruction_offset = 0, + data_offset = 2 * 4, + next_instruction_offset = 4 * 4 + }; + + address destination(nmethod *nm = NULL) const; + void set_destination(address new_destination); + ptrdiff_t destination_offset() const; +}; + +inline bool is_NativeCallTrampolineStub_at(address addr) { + // Ensure that the stub is exactly + // ldr xscratch1, L + // br xscratch1 + // L: + uint32_t *i = (uint32_t *)addr; + return i[0] == 0x58000048 && i[1] == 0xd61f0100; +} + +inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { + assert(is_NativeCallTrampolineStub_at(addr), "no call trampoline found"); + return (NativeCallTrampolineStub*)addr; +} + #endif // CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP --- old/src/cpu/aarch64/vm/relocInfo_aarch64.cpp 2014-12-09 13:50:06.321398284 -0500 +++ new/src/cpu/aarch64/vm/relocInfo_aarch64.cpp 2014-12-09 13:50:05.971429280 -0500 @@ -59,6 +59,13 @@ } address Relocation::pd_call_destination(address orig_addr) { + assert(is_call(), "should be a call here"); + if (is_call()) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + return nativeCallTrampolineStub_at(trampoline)->destination(); + } + } if (orig_addr != NULL) { return MacroAssembler::pd_call_destination(orig_addr); } @@ -67,7 +74,17 @@ void Relocation::pd_set_call_destination(address x) { + assert(is_call(), "should be a call here"); + if (NativeCall::is_call_at(addr())) { + address trampoline = nativeCall_at(addr())->get_trampoline(); + if (trampoline) { + nativeCall_at(addr())->set_destination_mt_safe(x, /* assert_lock */false); + return; + } + } + assert(addr() != x, "call instruction in an infinite loop"); MacroAssembler::pd_patch_instruction(addr(), x); + assert(pd_call_destination(addr()) == x, "fail in reloc"); } address* Relocation::pd_address_in_code() { @@ -80,17 +97,16 @@ } void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { - // fprintf(stderr, "Try to fix poll reloc at %p to %p\n", addr(), dest); if (NativeInstruction::maybe_cpool_ref(addr())) { address old_addr = old_addr_for(addr(), src, dest); - MacroAssembler::pd_patch_instruction(addr(), pd_call_destination(old_addr)); + MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr)); } } void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { if (NativeInstruction::maybe_cpool_ref(addr())) { address old_addr = old_addr_for(addr(), src, dest); - MacroAssembler::pd_patch_instruction(addr(), pd_call_destination(old_addr)); + MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr)); } } --- old/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp 2014-12-09 13:50:08.041245962 -0500 +++ new/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp 2014-12-09 13:50:07.701276072 -0500 @@ -741,7 +741,7 @@ __ cmp(rscratch1, tmp); __ ldr(rmethod, Address(holder, CompiledICHolder::holder_method_offset())); __ br(Assembler::EQ, ok); - __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); __ bind(ok); // Method might have been compiled since the call site was patched to @@ -749,7 +749,7 @@ // the call site corrected. __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset()))); __ cbz(rscratch1, skip_fixup); - __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); __ block_comment("} c2i_unverified_entry"); } @@ -1168,7 +1168,7 @@ static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs, int type) { CodeBlob *cb = CodeCache::find_blob(dest); if (cb) { - __ bl(RuntimeAddress(dest)); + __ far_call(RuntimeAddress(dest)); } else { assert((unsigned)gpargs < 256, "eek!"); assert((unsigned)fpargs < 32, "eek!"); @@ -1539,7 +1539,7 @@ __ cmp_klass(receiver, ic_reg, rscratch1); __ br(Assembler::EQ, hit); - __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); // Verified entry point must be aligned __ align(8); @@ -2099,7 +2099,7 @@ __ bind(exception_pending); // and forward the exception - __ b(RuntimeAddress(StubRoutines::forward_exception_entry())); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); } // Slow path locking & unlocking @@ -2835,7 +2835,7 @@ RegisterSaver::restore_live_registers(masm); - __ b(RuntimeAddress(StubRoutines::forward_exception_entry())); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); // No exception case __ bind(noException); @@ -2931,7 +2931,7 @@ __ str(zr, Address(rthread, JavaThread::vm_result_offset())); __ ldr(r0, Address(rthread, Thread::pending_exception_offset())); - __ b(RuntimeAddress(StubRoutines::forward_exception_entry())); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); // ------------- // make sure all code is generated --- old/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp 2014-12-09 13:50:09.731096296 -0500 +++ new/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp 2014-12-09 13:50:09.381127292 -0500 @@ -2450,7 +2450,7 @@ __ should_not_reach_here(); __ bind(L); #endif // ASSERT - __ b(RuntimeAddress(StubRoutines::forward_exception_entry())); + __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry())); // codeBlob framesize is in words (not VMRegImpl::slot_size) --- old/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp 2014-12-09 13:50:11.290958143 -0500 +++ new/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp 2014-12-09 13:50:10.960987367 -0500 @@ -524,7 +524,7 @@ // Note: the restored frame is not necessarily interpreted. // Use the shared runtime version of the StackOverflowError. assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); - __ b(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); + __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); // all done with frame size check __ bind(after_frame_check); --- old/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp 2014-12-09 13:50:12.780826189 -0500 +++ new/src/cpu/aarch64/vm/vtableStubs_aarch64.cpp 2014-12-09 13:50:12.430857184 -0500 @@ -180,7 +180,7 @@ __ br(rscratch1); __ bind(throw_icce); - __ b(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); + __ far_jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry())); __ flush(); --- old/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp 2014-12-09 13:50:14.250696006 -0500 +++ new/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp 2014-12-09 13:50:13.930724345 -0500 @@ -376,7 +376,14 @@ // Java thread running in Java code => find exception handler if any // a fault inside compiled code, the interpreter, or a stub - if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { + // Handle signal from NativeJump::patch_verified_entry(). + if ((sig == SIGILL || sig == SIGTRAP) + && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { + if (TraceTraps) { + tty->print_cr("trap: zombie_not_entrant (%s)", (sig == SIGTRAP) ? "SIGTRAP" : "SIGILL"); + } + stub = SharedRuntime::get_handle_wrong_method_stub(); + } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { stub = SharedRuntime::get_poll_stub(pc); } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { // BugId 4454115: A read from a MappedByteBuffer can fault --- old/src/share/vm/runtime/arguments.cpp 2014-12-09 13:50:15.750563166 -0500 +++ new/src/share/vm/runtime/arguments.cpp 2014-12-09 13:50:15.350598590 -0500 @@ -1162,7 +1162,7 @@ // Increase the code cache size - tiered compiles a lot more. if (FLAG_IS_DEFAULT(ReservedCodeCacheSize)) { FLAG_SET_ERGO(uintx, ReservedCodeCacheSize, - MIN2(CODE_CACHE_SIZE_LIMIT, ReservedCodeCacheSize * 5)); + MIN2(CODE_CACHE_DEFAULT_LIMIT, ReservedCodeCacheSize * 5)); } // Enable SegmentedCodeCache if TieredCompilation is enabled and ReservedCodeCacheSize >= 240M if (FLAG_IS_DEFAULT(SegmentedCodeCache) && ReservedCodeCacheSize >= 240*M) { --- old/src/share/vm/utilities/globalDefinitions.hpp 2014-12-09 13:50:17.480409958 -0500 +++ new/src/share/vm/utilities/globalDefinitions.hpp 2014-12-09 13:50:17.140440068 -0500 @@ -419,6 +419,8 @@ // The maximum size of the code cache. Can be overridden by targets. #define CODE_CACHE_SIZE_LIMIT (2*G) +// Allow targets to reduce the default size of the code cache. +#define CODE_CACHE_DEFAULT_LIMIT CODE_CACHE_SIZE_LIMIT #ifdef TARGET_ARCH_x86 # include "globalDefinitions_x86.hpp"