# HG changeset patch # User lucy # Date 1532686789 -7200 # Node ID 18a4d41a8f9133db585c26243f30c820fff78ce2 # Parent 979e349059eb9f7d0c0cffceb30bc4b3433302cf [mq]: 8207343.patch diff --git a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp --- a/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vtableStubs_aarch64.cpp @@ -44,24 +44,30 @@ #define __ masm-> #ifndef PRODUCT -extern "C" void bad_compiled_vtable_index(JavaThread* thread, - oop receiver, - int index); +extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); #endif VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { - const int aarch64_code_length = VtableStub::pd_code_size_limit(true); - VtableStub* s = new(aarch64_code_length) VtableStub(true, vtable_index); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } - ResourceMark rm; - CodeBuffer cb(s->entry_point(), aarch64_code_length); + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); -#ifndef PRODUCT +#if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { __ lea(r16, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); __ incrementw(Address(r16)); @@ -84,15 +90,26 @@ __ br(Assembler::GT, L); __ enter(); __ mov(r2, vtable_index); - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2); + + // TODO: find upper bound for call_VM length. + start_pc = __ pc(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, r2); + slop_delta = 470 - (__ pc() - start_pc); // call_VM varies in length, depending on data + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ leave(); __ bind(L); } #endif // PRODUCT + start_pc = __ pc(); __ lookup_virtual_method(r16, vtable_index, rmethod); + slop_delta = 8 - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); +#ifndef PRODUCT if (DebugVtables) { Label L; __ cbz(rmethod, L); @@ -101,6 +118,8 @@ __ stop("Vtable entry is NULL"); __ bind(L); } +#endif // PRODUCT + // r0: receiver klass // rmethod: Method* // r2: receiver @@ -108,43 +127,46 @@ __ ldr(rscratch1, Address(rmethod, Method::from_compiled_offset())); __ br(rscratch1); - __ flush(); + masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d", - vtable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - - s->set_exception_points(npe_addr, ame_addr); return s; } VtableStub* VtableStubs::create_itable_stub(int itable_index) { - // Note well: pd_code_size_limit is the absolute minimum we can get - // away with. If you add code here, bump the code stub size - // returned by pd_code_size_limit! - const int code_length = VtableStub::pd_code_size_limit(false); - VtableStub* s = new(code_length) VtableStub(false, itable_index); - ResourceMark rm; - CodeBuffer cb(s->entry_point(), code_length); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { + return NULL; + } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); -#ifndef PRODUCT +#if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { __ lea(r10, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); __ incrementw(Address(r10)); } #endif + // get receiver (need to skip return address on top of stack) + assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + // Entry arguments: // rscratch2: CompiledICHolder // j_rarg0: Receiver - // Most registers are in use; we'll use r16, rmethod, r10, r11 const Register recv_klass_reg = r10; const Register holder_klass_reg = r16; // declaring interface klass (DECC) @@ -157,8 +179,8 @@ __ ldr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); __ ldr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); - // get receiver (need to skip return address on top of stack) - assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); + start_pc = __ pc(); + // get receiver klass (also an implicit null-check) address npe_addr = __ pc(); __ load_klass(recv_klass_reg, j_rarg0); @@ -172,16 +194,25 @@ L_no_such_interface, /*return_method=*/false); + const ptrdiff_t typecheck_size = __ pc() - start_pc; + start_pc = __ pc(); + // Get selected method from declaring class and itable index __ load_klass(recv_klass_reg, j_rarg0); // restore recv_klass_reg __ lookup_interface_method(// inputs: rec. class, interface, itable index - recv_klass_reg, holder_klass_reg, itable_index, - // outputs: method, scan temp. reg - rmethod, temp_reg, - L_no_such_interface); + recv_klass_reg, holder_klass_reg, itable_index, + // outputs: method, scan temp. reg + rmethod, temp_reg, + L_no_such_interface); - // method (rmethod): Method* - // j_rarg0: receiver + const ptrdiff_t lookupSize = __ pc() - start_pc; + + // Reduce "estimate" such that "padding" does not drop below 8. + const ptrdiff_t estimate = 130; + const ptrdiff_t codesize = typecheckSize + lookupSize; + slop_delta = (int)(estimate - codesize); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); #ifdef ASSERT if (DebugVtables) { @@ -206,92 +237,17 @@ // We force resolving of the call site by jumping to the "handle // wrong method" stub, and so let the interpreter runtime do all the // dirty work. + assert(SharedRuntime::get_handle_wrong_method_stub() != NULL, "check initialization order"); __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); - __ flush(); + masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d", - itable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - - s->set_exception_points(npe_addr, ame_addr); return s; } - -int VtableStub::pd_code_size_limit(bool is_vtable_stub) { - int size = DebugVtables ? 216 : 0; - if (CountCompiledCalls) - size += 6 * 4; - // FIXME: vtable stubs only need 36 bytes - if (is_vtable_stub) - size += 52; - else - size += 176; - return size; - - // In order to tune these parameters, run the JVM with VM options - // +PrintMiscellaneous and +WizardMode to see information about - // actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops. - // - // If Universe::narrow_klass_base is nonzero, decoding a compressed - // class can take zeveral instructions. - // - // The JVM98 app. _202_jess has a megamorphic interface call. - // The itable code looks like this: - - // ldr xmethod, [xscratch2,#CompiledICHolder::holder_klass_offset] - // ldr x0, [xscratch2] - // ldr w10, [x1,#oopDesc::klass_offset_in_bytes] - // mov xheapbase, #0x3c000000 // #narrow_klass_base - // movk xheapbase, #0x3f7, lsl #32 - // add x10, xheapbase, x10 - // mov xheapbase, #0xe7ff0000 // #heapbase - // movk xheapbase, #0x3f7, lsl #32 - // ldr w11, [x10,#vtable_length_offset] - // add x11, x10, x11, uxtx #3 - // add x11, x11, #itableMethodEntry::method_offset_in_bytes - // ldr x10, [x11] - // cmp xmethod, x10 - // b.eq found_method - // search: - // cbz x10, no_such_interface - // add x11, x11, #0x10 - // ldr x10, [x11] - // cmp xmethod, x10 - // b.ne search - // found_method: - // ldr w10, [x1,#oopDesc::klass_offset_in_bytes] - // mov xheapbase, #0x3c000000 // #narrow_klass_base - // movk xheapbase, #0x3f7, lsl #32 - // add x10, xheapbase, x10 - // mov xheapbase, #0xe7ff0000 // #heapbase - // movk xheapbase, #0x3f7, lsl #32 - // ldr w11, [x10,#vtable_length_offset] - // add x11, x10, x11, uxtx #3 - // add x11, x11, #itableMethodEntry::method_offset_in_bytes - // add x10, x10, #itentry_off - // ldr xmethod, [x11] - // cmp x0, xmethod - // b.eq found_method2 - // search2: - // cbz xmethod, 0x000003ffa872e6cc - // add x11, x11, #0x10 - // ldr xmethod, [x11] - // cmp x0, xmethod - // b.ne search2 - // found_method2: - // ldr w11, [x11,#itableOffsetEntry::offset_offset_in_bytes] - // ldr xmethod, [x10,w11,uxtw] - // ldr xscratch1, [xmethod,#Method::from_compiled_offset] - // br xscratch1 - // no_such_interface: - // b throw_ICCE_entry - +int VtableStub::pd_code_alignment() { + // aarch cache line size is 64 bytes, but we just align on 4 bytes. + const unsigned int icache_line_size = 4; + return icache_line_size; } - -int VtableStub::pd_code_alignment() { return 4; } diff --git a/src/hotspot/cpu/arm/vtableStubs_arm.cpp b/src/hotspot/cpu/arm/vtableStubs_arm.cpp --- a/src/hotspot/cpu/arm/vtableStubs_arm.cpp +++ b/src/hotspot/cpu/arm/vtableStubs_arm.cpp @@ -48,17 +48,31 @@ #endif VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { - const int code_length = VtableStub::pd_code_size_limit(true); - VtableStub* s = new(code_length) VtableStub(true, vtable_index); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } - ResourceMark rm; - CodeBuffer cb(s->entry_point(), code_length); + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + // Implementation required? + } +#endif + assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0"); const Register tmp = Rtemp; // Rtemp OK, should be free at call sites @@ -66,17 +80,33 @@ address npe_addr = __ pc(); __ load_klass(tmp, R0); - { - int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes(); - int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset; +#ifndef PRODUCT + if (DebugVtables) { + // Implementation required? + } +#endif - assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned"); - int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff); - if (method_offset & ~offset_mask) { - __ add(tmp, tmp, method_offset & ~offset_mask); + start_pc = __ pc(); + { // lookup virtual method + int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index * vtableEntry::size_in_bytes(); + int method_offset = vtableEntry::method_offset_in_bytes() + entry_offset; + + assert ((method_offset & (wordSize - 1)) == 0, "offset should be aligned"); + int offset_mask = AARCH64_ONLY(0xfff << LogBytesPerWord) NOT_AARCH64(0xfff); + if (method_offset & ~offset_mask) { + __ add(tmp, tmp, method_offset & ~offset_mask); + } + __ ldr(Rmethod, Address(tmp, method_offset & offset_mask)); } - __ ldr(Rmethod, Address(tmp, method_offset & offset_mask)); + slop_delta = 8 - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + +#ifndef PRODUCT + if (DebugVtables) { + // Implementation required? } +#endif address ame_addr = __ pc(); #ifdef AARCH64 @@ -87,35 +117,36 @@ #endif // AARCH64 masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d", - vtable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // FIXME ARM: need correct 'slop' - below is x86 code - // shut the door on sizing bugs - //int slop = 8; // 32-bit offset is this much larger than a 13-bit one - //assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); - - s->set_exception_points(npe_addr, ame_addr); return s; } VtableStub* VtableStubs::create_itable_stub(int itable_index) { - const int code_length = VtableStub::pd_code_size_limit(false); - VtableStub* s = new(code_length) VtableStub(false, itable_index); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; - ResourceMark rm; - CodeBuffer cb(s->entry_point(), code_length); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + // Implementation required? + } +#endif + assert(VtableStub::receiver_location() == R0->as_VMReg(), "receiver expected in R0"); // R0-R3 / R0-R7 registers hold the arguments and cannot be spoiled @@ -132,6 +163,8 @@ Label L_no_such_interface; + start_pc = __ pc(); + // Receiver subtype check against REFC. __ ldr(Rintf, Address(Ricklass, CompiledICHolder::holder_klass_offset())); __ lookup_interface_method(// inputs: rec. class, interface, itable index @@ -140,6 +173,9 @@ noreg, Rscan, Rtemp, L_no_such_interface); + const ptrdiff_t typecheckSize = __ pc() - start_pc; + start_pc = __ pc(); + // Get Method* and entry point for compiler __ ldr(Rintf, Address(Ricklass, CompiledICHolder::holder_metadata_offset())); __ lookup_interface_method(// inputs: rec. class, interface, itable index @@ -148,6 +184,21 @@ Rmethod, Rscan, Rtemp, L_no_such_interface); + const ptrdiff_t lookupSize = __ pc() - start_pc; + + // Reduce "estimate" such that "padding" does not drop below 8. + const ptrdiff_t estimate = 130; + const ptrdiff_t codesize = typecheckSize + lookupSize; + slop_delta = (int)(estimate - codesize); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); + +#ifndef PRODUCT + if (DebugVtables) { + // Implementation required? + } +#endif + address ame_addr = __ pc(); #ifdef AARCH64 @@ -158,7 +209,6 @@ #endif // AARCH64 __ bind(L_no_such_interface); - // Handle IncompatibleClassChangeError in itable stubs. // More detailed error message. // We force resolving of the call site by jumping to the "handle @@ -168,43 +218,13 @@ __ jump(SharedRuntime::get_handle_wrong_method_stub(), relocInfo::runtime_call_type, Rtemp); masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d", - itable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // FIXME ARM: need correct 'slop' - below is x86 code - // shut the door on sizing bugs - //int slop = 8; // 32-bit offset is this much larger than a 13-bit one - //assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); - - s->set_exception_points(npe_addr, ame_addr); return s; } -int VtableStub::pd_code_size_limit(bool is_vtable_stub) { - int instr_count; - - if (is_vtable_stub) { - // vtable stub size - instr_count = NOT_AARCH64(4) AARCH64_ONLY(5); - } else { - // itable stub size - instr_count = NOT_AARCH64(31) AARCH64_ONLY(31); - } - -#ifdef AARCH64 - if (UseCompressedClassPointers) { - instr_count += MacroAssembler::instr_count_for_decode_klass_not_null(); - } -#endif // AARCH64 - - return instr_count * Assembler::InstructionSize; +int VtableStub::pd_code_alignment() { + // arm cache line size is 64 bytes, but we just align on word size. + const unsigned int icache_line_size = wordSize; + return icache_line_size; } - -int VtableStub::pd_code_alignment() { - return 8; -} diff --git a/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp b/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp --- a/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/vtableStubs_ppc_64.cpp @@ -39,36 +39,39 @@ #define __ masm-> -#ifdef PRODUCT -#define BLOCK_COMMENT(str) // nothing -#else -#define BLOCK_COMMENT(str) __ block_comment(str) -#endif -#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") - #ifndef PRODUCT extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index); #endif -// Used by compiler only; may use only caller saved, non-argument -// registers. +// Used by compiler only; may use only caller saved, non-argument registers. VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { - // PPC port: use fixed size. - const int code_length = VtableStub::pd_code_size_limit(true); - VtableStub* s = new (code_length) VtableStub(true, vtable_index); - + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } - ResourceMark rm; - CodeBuffer cb(s->entry_point(), code_length); + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 8; // just a two-instruction safety net + int slop_delta = 0; + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); -#ifndef PRODUCT +#if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { + start_pc = __ pc(); + int load_const_maxLen = 5*BytesPerInstWord; // load_const generates 5 instructions. Assume that as max size for laod_const_optimized int offs = __ load_const_optimized(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr(), R12_scratch2, true); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); __ lwz(R12_scratch2, offs, R11_scratch1); __ addi(R12_scratch2, R12_scratch2, 1); __ stw(R12_scratch2, offs, R11_scratch1); @@ -77,17 +80,13 @@ assert(VtableStub::receiver_location() == R3_ARG1->as_VMReg(), "receiver expected in R3_ARG1"); + const Register rcvr_klass = R11_scratch1; + address npe_addr = __ pc(); // npe = null pointer exception + // check if we must do an explicit check (implicit checks disabled, offset too large). + __ null_check(R3, oopDesc::klass_offset_in_bytes(), /*implicit only*/NULL); // Get receiver klass. - const Register rcvr_klass = R11_scratch1; - - // We might implicit NULL fault here. - address npe_addr = __ pc(); // npe = null pointer exception - __ null_check(R3, oopDesc::klass_offset_in_bytes(), /*implicit only*/NULL); __ load_klass(rcvr_klass, R3); - // Set method (in case of interpreted method), and destination address. - int entry_offset = in_bytes(Klass::vtable_start_offset()) + vtable_index*vtableEntry::size_in_bytes(); - #ifndef PRODUCT if (DebugVtables) { Label L; @@ -102,7 +101,9 @@ } #endif - int v_off = entry_offset + vtableEntry::method_offset_in_bytes(); + int entry_offset = in_bytes(Klass::vtable_start_offset()) + + vtable_index*vtableEntry::size_in_bytes(); + int v_off = entry_offset + vtableEntry::method_offset_in_bytes(); __ ld(R19_method, (RegisterOrConstant)v_off, rcvr_klass); @@ -116,40 +117,48 @@ } #endif - // If the vtable entry is null, the method is abstract. address ame_addr = __ pc(); // ame = abstract method error + // if the vtable entry is null, the method is abstract + // NOTE: for vtable dispatches, the vtable entry will never be null. + __ null_check(R19_method, in_bytes(Method::from_compiled_offset()), /*implicit only*/NULL); __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method); __ mtctr(R12_scratch2); __ bctr(); masm->flush(); - - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - - s->set_exception_points(npe_addr, ame_addr); + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); return s; } VtableStub* VtableStubs::create_itable_stub(int itable_index) { - // PPC port: use fixed size. - const int code_length = VtableStub::pd_code_size_limit(false); - VtableStub* s = new (code_length) VtableStub(false, itable_index); - + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 8; // just a two-instruction safety net + int slop_delta = 0; - ResourceMark rm; - CodeBuffer cb(s->entry_point(), code_length); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); - address start_pc; + int load_const_maxLen = 5*BytesPerInstWord; // load_const generates 5 instructions. Assume that as max size for laod_const_optimized -#ifndef PRODUCT +#if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { + start_pc = __ pc(); int offs = __ load_const_optimized(R11_scratch1, SharedRuntime::nof_megamorphic_calls_addr(), R12_scratch2, true); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); __ lwz(R12_scratch2, offs, R11_scratch1); __ addi(R12_scratch2, R12_scratch2, 1); __ stw(R12_scratch2, offs, R11_scratch1); @@ -209,33 +218,22 @@ // wrong method" stub, and so let the interpreter runtime do all the // dirty work. __ bind(L_no_such_interface); + start_pc = __ pc(); __ load_const_optimized(R11_scratch1, SharedRuntime::get_handle_wrong_method_stub(), R12_scratch2); + slop_delta = load_const_maxLen - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); __ mtctr(R11_scratch1); __ bctr(); masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - - s->set_exception_points(npe_addr, ame_addr); return s; } -int VtableStub::pd_code_size_limit(bool is_vtable_stub) { - if (DebugVtables || CountCompiledCalls || VerifyOops) { - return 1000; - } - int size = is_vtable_stub ? 20 + 8 : 164 + 20; // Plain + safety - if (UseCompressedClassPointers) { - size += MacroAssembler::instr_size_for_decode_klass_not_null(); - } - if (!ImplicitNullChecks || !os::zero_page_read_protected()) { - size += is_vtable_stub ? 8 : 12; - } - return size; -} - int VtableStub::pd_code_alignment() { + // Power cache line size is 128 bytes, but we want to limit alignment loss. const unsigned int icache_line_size = 32; return icache_line_size; } diff --git a/src/hotspot/cpu/s390/vtableStubs_s390.cpp b/src/hotspot/cpu/s390/vtableStubs_s390.cpp --- a/src/hotspot/cpu/s390/vtableStubs_s390.cpp +++ b/src/hotspot/cpu/s390/vtableStubs_s390.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016, 2017 SAP SE. All rights reserved. + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2018 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -37,9 +37,6 @@ #include "opto/runtime.hpp" #endif -// Machine-dependent part of VtableStubs: create vtableStub of correct -// size and initialize its code. - #define __ masm-> #ifndef PRODUCT @@ -48,123 +45,140 @@ // Used by compiler only; may use only caller saved, non-argument registers. VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { - - const int code_length = VtableStub::pd_code_size_limit(true); - VtableStub *s = new(code_length) VtableStub(true, vtable_index); - if (s == NULL) { // Indicates OOM In the code cache. + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { return NULL; } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + ResourceMark rm; - CodeBuffer cb(s->entry_point(), code_length); - MacroAssembler *masm = new MacroAssembler(&cb); - int padding_bytes = 0; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); #if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { - // Count unused bytes - // worst case actual size - padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true); - + // worst case actual size + slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); // Use generic emitter for direct memory increment. // Abuse Z_method as scratch register for generic emitter. // It is loaded further down anyway before it is first used. + // No dynamic code size variance here, increment is 1, always. __ add2mem_32(Address(Z_R1_scratch), 1, Z_method); } #endif assert(VtableStub::receiver_location() == Z_R2->as_VMReg(), "receiver expected in Z_ARG1"); + const Register rcvr_klass = Z_R1_scratch; + address npe_addr = __ pc(); // npe == NULL ptr exception + // check if we must do an explicit check (implicit checks disabled, offset too large). + __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes()); // Get receiver klass. - // Must do an explicit check if implicit checks are disabled. - address npe_addr = __ pc(); // npe == NULL ptr exception - __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes()); - const Register rcvr_klass = Z_R1_scratch; __ load_klass(rcvr_klass, Z_ARG1); - // Set method (in case of interpreted method), and destination address. - int entry_offset = in_bytes(Klass::vtable_start_offset()) + - vtable_index * vtableEntry::size_in_bytes(); - #ifndef PRODUCT if (DebugVtables) { - Label L; + NearLabel L; // Check offset vs vtable length. const Register vtable_idx = Z_R0_scratch; - // Count unused bytes. - // worst case actual size - padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size_in_bytes(), true); + // worst case actual size + slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(vtable_idx, vtable_index*vtableEntry::size(), true); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); - assert(Immediate::is_uimm12(in_bytes(Klass::vtable_length_offset())), "disp to large"); + assert(Displacement::is_shortDisp(in_bytes(Klass::vtable_length_offset())), "disp to large"); __ z_cl(vtable_idx, in_bytes(Klass::vtable_length_offset()), rcvr_klass); __ z_brl(L); __ z_lghi(Z_ARG3, vtable_index); // Debug code, don't optimize. __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), Z_ARG1, Z_ARG3, false); // Count unused bytes (assume worst case here). - padding_bytes += 12; + slop_bytes += 12; __ bind(L); } #endif - int v_off = entry_offset + vtableEntry::method_offset_in_bytes(); + int entry_offset = in_bytes(Klass::vtable_start_offset()) + + vtable_index * vtableEntry::size_in_bytes(); + int v_off = entry_offset + vtableEntry::method_offset_in_bytes(); + // Set method (in case of interpreted method), and destination address. // Duplicate safety code from enc_class Java_Dynamic_Call_dynTOC. if (Displacement::is_validDisp(v_off)) { __ z_lg(Z_method/*method oop*/, v_off, rcvr_klass/*class oop*/); // Account for the load_const in the else path. - padding_bytes += __ load_const_size(); + slop_delta = __ load_const_size(); } else { // Worse case, offset does not fit in displacement field. - __ load_const(Z_method, v_off); // Z_method temporarily holds the offset value. + // worst case actual size + slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_method, v_off, true); __ z_lg(Z_method/*method oop*/, 0, Z_method/*method offset*/, rcvr_klass/*class oop*/); } + slop_bytes += slop_delta; #ifndef PRODUCT if (DebugVtables) { - Label L; + NearLabel L; __ z_ltgr(Z_method, Z_method); __ z_brne(L); - __ stop("Vtable entry is ZERO",102); + __ stop("Vtable entry is ZERO", 102); __ bind(L); } #endif - address ame_addr = __ pc(); // ame = abstract method error - - // Must do an explicit check if implicit checks are disabled. + // Must do an explicit check if offset too large or implicit checks are disabled. + address ame_addr = __ pc(); __ null_check(Z_method, Z_R1_scratch, in_bytes(Method::from_compiled_offset())); __ z_lg(Z_R1_scratch, in_bytes(Method::from_compiled_offset()), Z_method); __ z_br(Z_R1_scratch); masm->flush(); - - s->set_exception_points(npe_addr, ame_addr); + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0); return s; } VtableStub* VtableStubs::create_itable_stub(int itable_index) { - const int code_length = VtableStub::pd_code_size_limit(false); - VtableStub *s = new(code_length) VtableStub(false, itable_index); - if (s == NULL) { // Indicates OOM in the code cache. + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); + // Can be NULL if there is no free space in the code cache. + if (s == NULL) { return NULL; } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; ResourceMark rm; - CodeBuffer cb(s->entry_point(), code_length); - MacroAssembler *masm = new MacroAssembler(&cb); - int padding_bytes = 0; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler* masm = new MacroAssembler(&cb); #if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { - // Count unused bytes - // worst case actual size - padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true); - + // worst case actual size + slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::nof_megamorphic_calls_addr(), true); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); // Use generic emitter for direct memory increment. - // Use Z_tmp_1 as scratch register for generic emitter. - __ add2mem_32((Z_R1_scratch), 1, Z_tmp_1); + // Abuse Z_method as scratch register for generic emitter. + // It is loaded further down anyway before it is first used. + // No dynamic code size variance here, increment is 1, always. + __ add2mem_32(Address(Z_R1_scratch), 1, Z_method); } #endif @@ -178,7 +192,7 @@ interface = Z_tmp_2; // Get receiver klass. - // Must do an explicit check if implicit checks are disabled. + // Must do an explicit check if offset too large or implicit checks are disabled. address npe_addr = __ pc(); // npe == NULL ptr exception __ null_check(Z_ARG1, Z_R1_scratch, oopDesc::klass_offset_in_bytes()); __ load_klass(rcvr_klass, Z_ARG1); @@ -195,10 +209,10 @@ #ifndef PRODUCT if (DebugVtables) { - Label ok1; + NearLabel ok1; __ z_ltgr(Z_method, Z_method); __ z_brne(ok1); - __ stop("method is null",103); + __ stop("method is null", 103); __ bind(ok1); } #endif @@ -213,39 +227,24 @@ // Handle IncompatibleClassChangeError in itable stubs. __ bind(no_such_interface); - // Count unused bytes - // worst case actual size - // We force resolving of the call site by jumping to - // the "handle wrong method" stub, and so let the + // more detailed IncompatibleClassChangeError + // we force re-resolving of the call site by jumping to + // the "handle wrong method" stub, thus letting the // interpreter runtime do all the dirty work. - padding_bytes += __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true); + // worst case actual size + slop_delta = __ load_const_size() - __ load_const_optimized_rtn_len(Z_R1_scratch, (long)SharedRuntime::get_handle_wrong_method_stub(), true); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); __ z_br(Z_R1_scratch); masm->flush(); + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); - s->set_exception_points(npe_addr, ame_addr); return s; } -// In order to tune these parameters, run the JVM with VM options -// +PrintMiscellaneous and +WizardMode to see information about -// actual itable stubs. Run it with -Xmx31G -XX:+UseCompressedOops. -int VtableStub::pd_code_size_limit(bool is_vtable_stub) { - int size = DebugVtables ? 216 : 0; - if (CountCompiledCalls) { - size += 6 * 4; - } - size += is_vtable_stub ? 36 : 140; - if (UseCompressedClassPointers) { - size += MacroAssembler::instr_size_for_decode_klass_not_null(); - } - if (!ImplicitNullChecks) { - size += 36; - } - return size; -} - int VtableStub::pd_code_alignment() { + // System z cache line size is 256 bytes, but octoword-alignment is quite ok. const unsigned int icache_line_size = 32; return icache_line_size; } diff --git a/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp b/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp --- a/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp +++ b/src/hotspot/cpu/sparc/vtableStubs_sparc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,32 +41,38 @@ #define __ masm-> - #ifndef PRODUCT extern "C" void bad_compiled_vtable_index(JavaThread* thread, oopDesc* receiver, int index); #endif // Used by compiler only; may use only caller saved, non-argument registers -// NOTE: %%%% if any change is made to this stub make sure that the function -// pd_code_size_limit is changed to ensure the correct size for VtableStub VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { - const int sparc_code_length = VtableStub::pd_code_size_limit(true); - VtableStub* s = new(sparc_code_length) VtableStub(true, vtable_index); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } - ResourceMark rm; - CodeBuffer cb(s->entry_point(), sparc_code_length); + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + const int slop32 = ((vtable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?). + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); -#ifndef PRODUCT +#if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G5, G3_scratch); } -#endif /* PRODUCT */ +#endif // PRODUCT assert(VtableStub::receiver_location() == O0->as_VMReg(), "receiver expected in O0"); @@ -74,20 +80,33 @@ address npe_addr = __ pc(); __ load_klass(O0, G3_scratch); - // set Method* (in case of interpreted method), and destination address #ifndef PRODUCT if (DebugVtables) { Label L; // check offset vs vtable length __ ld(G3_scratch, in_bytes(Klass::vtable_length_offset()), G5); __ cmp_and_br_short(G5, vtable_index*vtableEntry::size(), Assembler::greaterUnsigned, Assembler::pt, L); + + // set generates 8 instructions (worst case), 1 instruction (best case) + start_pc = __ pc(); __ set(vtable_index, O2); + slop_delta = __ worst_case_insts_for_set()*BytesPerInstWord - (__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + + // there is no variance in call_VM() emitted code. __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), O0, O2); __ bind(L); } #endif + // set Method* (in case of interpreted method), and destination address + start_pc = __ pc(); __ lookup_virtual_method(G3_scratch, vtable_index, G5_method); + // lookup_virtual_method generates 3 instructions (worst case), 1 instruction (best case) + slop_delta = 3*BytesPerInstWord - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); #ifndef PRODUCT if (DebugVtables) { @@ -109,37 +128,41 @@ __ delayed()->nop(); masm->flush(); + slop_bytes += slop32; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, slop32); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d", - vtable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 2*BytesPerInstWord; // 32-bit offset is this much larger than a 13-bit one - assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add"); - - s->set_exception_points(npe_addr, ame_addr); return s; } -// NOTE: %%%% if any change is made to this stub make sure that the function -// pd_code_size_limit is changed to ensure the correct size for VtableStub VtableStub* VtableStubs::create_itable_stub(int itable_index) { - const int sparc_code_length = VtableStub::pd_code_size_limit(false); - VtableStub* s = new(sparc_code_length) VtableStub(false, itable_index); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + const int slop32 = ((itable_index < 512) ? 2 : 0)*BytesPerInstWord; // code size change with transition from 13-bit to 32-bit constant (@index == 512?). - ResourceMark rm; - CodeBuffer cb(s->entry_point(), sparc_code_length); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { +// Use G3_scratch, G4_scratch as work regs for inc_counter. +// These are defined before use further down. + __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), G3_scratch, G4_scratch); + } +#endif // PRODUCT + Register G3_Klass = G3_scratch; Register G5_icholder = G5; // Passed in as an argument Register G4_interface = G4_scratch; @@ -160,15 +183,10 @@ // and so those registers are not available here. __ save(SP,-frame::register_save_words*wordSize,SP); -#ifndef PRODUCT - if (CountCompiledCalls) { - __ inc_counter(SharedRuntime::nof_megamorphic_calls_addr(), L0, L1); - } -#endif /* PRODUCT */ + Label L_no_such_interface; + Register L5_method = L5; - Label L_no_such_interface; - - Register L5_method = L5; + start_pc = __ pc(); // Receiver subtype check against REFC. __ ld_ptr(G5_icholder, CompiledICHolder::holder_klass_offset(), G4_interface); @@ -179,6 +197,9 @@ L_no_such_interface, /*return_method=*/ false); + const ptrdiff_t typecheckSize = __ pc() - start_pc; + start_pc = __ pc(); + // Get Method* and entrypoint for compiler __ ld_ptr(G5_icholder, CompiledICHolder::holder_metadata_offset(), G4_interface); __ lookup_interface_method(// inputs: rec. class, interface, itable index @@ -187,6 +208,19 @@ L5_method, L2, L3, L_no_such_interface); + const ptrdiff_t lookupSize = __ pc() - start_pc; + + // Reduce "estimate" such that "padding" does not drop below 8. + // Do not target a left-over number of zero, because a very + // large vtable or itable offset (> 4K) will require an extra + // sethi/or pair of instructions. + // Found typecheck(60) + lookup(72) to exceed previous extimate (32*4). + const ptrdiff_t estimate = 36*BytesPerInstWord; + const ptrdiff_t codesize = typecheckSize + lookupSize + slop32; + slop_delta = (int)(estimate - codesize); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); + #ifndef PRODUCT if (DebugVtables) { Label L01; @@ -222,88 +256,12 @@ __ delayed()->restore(); masm->flush(); + slop_bytes += slop32; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, slop32); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d", - itable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 2*BytesPerInstWord; // 32-bit offset is this much larger than a 13-bit one - assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for sethi;add"); - - s->set_exception_points(npe_addr, ame_addr); return s; } - -int VtableStub::pd_code_size_limit(bool is_vtable_stub) { - if (DebugVtables || CountCompiledCalls || VerifyOops) return 1000; - else { - const int slop = 2*BytesPerInstWord; // sethi;add (needed for long offsets) - if (is_vtable_stub) { - // ld;ld;ld,jmp,nop - const int basic = 5*BytesPerInstWord + - // shift;add for load_klass (only shift with zero heap based) - (UseCompressedClassPointers ? - MacroAssembler::instr_size_for_decode_klass_not_null() : 0); - return basic + slop; - } else { - const int basic = 54 * BytesPerInstWord + - // shift;add for load_klass (only shift with zero heap based) - (UseCompressedClassPointers ? - MacroAssembler::instr_size_for_decode_klass_not_null() : 0); - return (basic + slop); - } - } - - // In order to tune these parameters, run the JVM with VM options - // +PrintMiscellaneous and +WizardMode to see information about - // actual itable stubs. Look for lines like this: - // itable #1 at 0x5551212[116] left over: 8 - // Reduce the constants so that the "left over" number is 8 - // Do not aim at a left-over number of zero, because a very - // large vtable or itable offset (> 4K) will require an extra - // sethi/or pair of instructions. - // - // The JVM98 app. _202_jess has a megamorphic interface call. - // The itable code looks like this: - // Decoding VtableStub itbl[1]@16 - // ld [ %o0 + 4 ], %g3 - // save %sp, -64, %sp - // ld [ %g3 + 0xe8 ], %l2 - // sll %l2, 2, %l2 - // add %l2, 0x134, %l2 - // add %g3, %l2, %l2 - // add %g3, 4, %g3 - // ld [ %l2 ], %l5 - // brz,pn %l5, throw_icce - // cmp %l5, %g5 - // be %icc, success - // add %l2, 8, %l2 - // loop: - // ld [ %l2 ], %l5 - // brz,pn %l5, throw_icce - // cmp %l5, %g5 - // bne,pn %icc, loop - // add %l2, 8, %l2 - // success: - // ld [ %l2 + -4 ], %l2 - // ld [ %g3 + %l2 ], %l5 - // restore %l5, 0, %g5 - // ld [ %g5 + 0x44 ], %g3 - // jmp %g3 - // nop - // throw_icce: - // sethi %hi(throw_ICCE_entry), %g3 - // ! 5 more instructions here, LP64_ONLY - // jmp %g3 + %lo(throw_ICCE_entry) - // restore -} - - int VtableStub::pd_code_alignment() { // UltraSPARC cache line size is 8 instructions: const unsigned int icache_line_size = 32; diff --git a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp b/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp --- a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp +++ b/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -55,25 +55,36 @@ // Available now, but may become callee-save at some point: // rsi, rdi // Note that rax and rdx are also used for return values. -// + VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { - const int i486_code_length = VtableStub::pd_code_size_limit(true); - VtableStub* s = new(i486_code_length) VtableStub(true, vtable_index); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } - ResourceMark rm; - CodeBuffer cb(s->entry_point(), i486_code_length); + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + // No variance was detected in vtable stub sizes. Setting slop32 == 0 will unveil any deviation from this observation. + const int slop32 = 0; +// const int slop32 = (vtable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 32). +// (vtable_index < 32) ? 3 : 0; // index == 0 generates even shorter code. + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); -#ifndef PRODUCT - +#if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); } -#endif /* PRODUCT */ +#endif // get receiver (need to skip return address on top of stack) assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx"); @@ -85,11 +96,21 @@ #ifndef PRODUCT if (DebugVtables) { Label L; + start_pc = __ pc(); // check offset vs vtable length __ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size()); + slop_delta = 6 - (__ pc() - start_pc); // cmpl varies in length, depending on data + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ jcc(Assembler::greater, L); __ movl(rbx, vtable_index); + // VTABLE TODO: find upper bound for call_VM length. + start_pc = __ pc(); __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx); + slop_delta = 470 - (__ pc() - start_pc); // cmpl varies in length, depending on data + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); __ bind(L); } #endif // PRODUCT @@ -97,8 +118,13 @@ const Register method = rbx; // load Method* and target address + start_pc = __ pc(); __ lookup_virtual_method(rax, vtable_index, method); + slop_delta = 6 - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); +#ifndef PRODUCT if (DebugVtables) { Label L; __ cmpptr(method, (int32_t)NULL_WORD); @@ -108,55 +134,53 @@ __ stop("Vtable entry is NULL"); __ bind(L); } +#endif // PRODUCT - // rax,: receiver klass + // rax: receiver klass // method (rbx): Method* // rcx: receiver address ame_addr = __ pc(); __ jmp( Address(method, Method::from_compiled_offset())); masm->flush(); + slop_bytes += slop32; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, slop32); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d", - vtable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 3; // 32-bit offset is this much larger than an 8-bit one - assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); - - s->set_exception_points(npe_addr, ame_addr); return s; } VtableStub* VtableStubs::create_itable_stub(int itable_index) { - // Note well: pd_code_size_limit is the absolute minimum we can get away with. If you - // add code here, bump the code stub size returned by pd_code_size_limit! - const int i486_code_length = VtableStub::pd_code_size_limit(false); - VtableStub* s = new(i486_code_length) VtableStub(false, itable_index); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + const int slop32 = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 32). + (itable_index < 32) ? 3 : 0; // index == 0 generates even shorter code. - ResourceMark rm; - CodeBuffer cb(s->entry_point(), i486_code_length); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); +#if (!defined(PRODUCT) && defined(COMPILER2)) + if (CountCompiledCalls) { + __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); + } +#endif /* PRODUCT */ + // Entry arguments: // rax: CompiledICHolder // rcx: Receiver -#ifndef PRODUCT - if (CountCompiledCalls) { - __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); - } -#endif /* PRODUCT */ - // Most registers are in use; we'll use rax, rbx, rsi, rdi // (If we need to make rsi, rdi callee-save, do a push/pop here.) const Register recv_klass_reg = rsi; @@ -171,10 +195,12 @@ Label L_no_such_interface; // get receiver klass (also an implicit null-check) + assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx"); address npe_addr = __ pc(); - assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx"); __ load_klass(recv_klass_reg, rcx); + start_pc = __ pc(); + // Receiver subtype check against REFC. // Destroys recv_klass_reg value. __ lookup_interface_method(// inputs: rec. class, interface @@ -184,6 +210,9 @@ L_no_such_interface, /*return_method=*/false); + const ptrdiff_t typecheckSize = __ pc() - start_pc; + start_pc = __ pc(); + // Get selected method from declaring class and itable index const Register method = rbx; __ load_klass(recv_klass_reg, rcx); // restore recv_klass_reg @@ -193,19 +222,30 @@ method, temp_reg, L_no_such_interface); + const ptrdiff_t lookupSize = __ pc() - start_pc; + + // We expect we need slop32 extra bytes. Reason: + // The emitted code in lookup_interface_method changes when itable_index exceeds 31. + // For windows, a narrow estimate was found to be 104. Other OSes not tested. + const ptrdiff_t estimate = 104; + const ptrdiff_t codesize = typecheckSize + lookupSize + slop32; + slop_delta = (int)(estimate - codesize); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); + // method (rbx): Method* // rcx: receiver #ifdef ASSERT if (DebugVtables) { - Label L1; - __ cmpptr(method, (int32_t)NULL_WORD); - __ jcc(Assembler::equal, L1); - __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD); - __ jcc(Assembler::notZero, L1); - __ stop("Method* is null"); - __ bind(L1); - } + Label L1; + __ cmpptr(method, (int32_t)NULL_WORD); + __ jcc(Assembler::equal, L1); + __ cmpptr(Address(method, Method::from_compiled_offset()), (int32_t)NULL_WORD); + __ jcc(Assembler::notZero, L1); + __ stop("Method* is null"); + __ bind(L1); + } #endif // ASSERT address ame_addr = __ pc(); @@ -219,70 +259,15 @@ // dirty work. __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); - __ flush(); + masm->flush(); + slop_bytes += slop32; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, slop32); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d", - itable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 3; // 32-bit offset is this much larger than an 8-bit one - assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); - - s->set_exception_points(npe_addr, ame_addr); return s; } - - -int VtableStub::pd_code_size_limit(bool is_vtable_stub) { - if (is_vtable_stub) { - // Vtable stub size - return (DebugVtables ? 210 : 16) + (CountCompiledCalls ? 6 : 0); - } else { - // Itable stub size - return (DebugVtables ? 256 : 110) + (CountCompiledCalls ? 6 : 0); - } - // In order to tune these parameters, run the JVM with VM options - // +PrintMiscellaneous and +WizardMode to see information about - // actual itable stubs. Look for lines like this: - // itable #1 at 0x5551212[65] left over: 3 - // Reduce the constants so that the "left over" number is >=3 - // for the common cases. - // Do not aim at a left-over number of zero, because a - // large vtable or itable index (> 16) will require a 32-bit - // immediate displacement instead of an 8-bit one. - // - // The JVM98 app. _202_jess has a megamorphic interface call. - // The itable code looks like this: - // Decoding VtableStub itbl[1]@1 - // mov 0x4(%ecx),%esi - // mov 0xe8(%esi),%edi - // lea 0x130(%esi,%edi,4),%edi - // add $0x7,%edi - // and $0xfffffff8,%edi - // lea 0x4(%esi),%esi - // mov (%edi),%ebx - // cmp %ebx,%eax - // je success - // loop: - // test %ebx,%ebx - // je throw_icce - // add $0x8,%edi - // mov (%edi),%ebx - // cmp %ebx,%eax - // jne loop - // success: - // mov 0x4(%edi),%edi - // mov (%esi,%edi,1),%ebx - // jmp *0x44(%ebx) - // throw_icce: - // jmp throw_ICCE_entry +int VtableStub::pd_code_alignment() { + // x86 cache line size is 64 bytes, but we want to limit alignment loss. + const unsigned int icache_line_size = wordSize; + return icache_line_size; } - -int VtableStub::pd_code_alignment() { - return wordSize; -} diff --git a/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp b/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp --- a/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp +++ b/src/hotspot/cpu/x86/vtableStubs_x86_64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,24 +42,33 @@ #define __ masm-> #ifndef PRODUCT -extern "C" void bad_compiled_vtable_index(JavaThread* thread, - oop receiver, - int index); +extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); #endif VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { - const int amd64_code_length = VtableStub::pd_code_size_limit(true); - VtableStub* s = new(amd64_code_length) VtableStub(true, vtable_index); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(true); + VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } - ResourceMark rm; - CodeBuffer cb(s->entry_point(), amd64_code_length); + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + const int slop32 = (vtable_index == 0) ? 4 : 3; +// const int slop32 = (vtable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 16). +// (vtable_index < 16) ? 3 : 0; // index == 0 generates even shorter code. + + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); MacroAssembler* masm = new MacroAssembler(&cb); -#ifndef PRODUCT +#if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); } @@ -77,22 +86,35 @@ #ifndef PRODUCT if (DebugVtables) { Label L; + start_pc = __ pc(); // check offset vs vtable length - __ cmpl(Address(rax, Klass::vtable_length_offset()), - vtable_index * vtableEntry::size()); + __ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size()); + slop_delta = 12 - (__ pc() - start_pc); // cmpl varies in length, depending on data + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); + __ jcc(Assembler::greater, L); __ movl(rbx, vtable_index); - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, rbx); + // VTABLE TODO: find upper bound for call_VM length. + start_pc = __ pc(); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, rbx); + slop_delta = 470 - (__ pc() - start_pc); // cmpl varies in length, depending on data + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); __ bind(L); } #endif // PRODUCT - // load Method* and target address const Register method = rbx; + // load Method* and target address + start_pc = __ pc(); __ lookup_virtual_method(rax, vtable_index, method); + slop_delta = 8 - (int)(__ pc() - start_pc); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); +#ifndef PRODUCT if (DebugVtables) { Label L; __ cmpptr(method, (int32_t)NULL_WORD); @@ -102,50 +124,48 @@ __ stop("Vtable entry is NULL"); __ bind(L); } +#endif // PRODUCT + // rax: receiver klass - // rbx: Method* + // method (rbx): Method* // rcx: receiver address ame_addr = __ pc(); __ jmp( Address(rbx, Method::from_compiled_offset())); - __ flush(); + masm->flush(); + slop_bytes += slop32; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, slop32); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("vtable #%d at " PTR_FORMAT "[%d] left over: %d", - vtable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 3; // 32-bit offset is this much larger than an 8-bit one - assert(vtable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); - - s->set_exception_points(npe_addr, ame_addr); return s; } VtableStub* VtableStubs::create_itable_stub(int itable_index) { - // Note well: pd_code_size_limit is the absolute minimum we can get - // away with. If you add code here, bump the code stub size - // returned by pd_code_size_limit! - const int amd64_code_length = VtableStub::pd_code_size_limit(false); - VtableStub* s = new(amd64_code_length) VtableStub(false, itable_index); + // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. + const int stub_code_length = VtableStub::code_size_limit(false); + VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); // Can be NULL if there is no free space in the code cache. if (s == NULL) { return NULL; } + // Count unused bytes in instruction sequences of variable size. + // We add them to the computed buffer size in order to avoid + // overflow in subsequently generated stubs. + address start_pc; + int slop_bytes = 0; + int slop_delta = 0; + const int slop32 = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 16). + (itable_index < 16) ? 3 : 0; // index == 0 generates even shorter code. - ResourceMark rm; - CodeBuffer cb(s->entry_point(), amd64_code_length); - MacroAssembler* masm = new MacroAssembler(&cb); + ResourceMark rm; + CodeBuffer cb(s->entry_point(), stub_code_length); + MacroAssembler *masm = new MacroAssembler(&cb); -#ifndef PRODUCT +#if (!defined(PRODUCT) && defined(COMPILER2)) if (CountCompiledCalls) { __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); } -#endif +#endif // PRODUCT // Entry arguments: // rax: CompiledICHolder @@ -158,17 +178,19 @@ const Register resolved_klass_reg = rbx; // resolved interface klass (REFC) const Register temp_reg = r11; - Label L_no_such_interface; - const Register icholder_reg = rax; __ movptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset())); __ movptr(holder_klass_reg, Address(icholder_reg, CompiledICHolder::holder_metadata_offset())); + Label L_no_such_interface; + // get receiver klass (also an implicit null-check) assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0"); address npe_addr = __ pc(); __ load_klass(recv_klass_reg, j_rarg0); + start_pc = __ pc(); + // Receiver subtype check against REFC. // Destroys recv_klass_reg value. __ lookup_interface_method(// inputs: rec. class, interface @@ -178,6 +200,9 @@ L_no_such_interface, /*return_method=*/false); + const ptrdiff_t typecheckSize = __ pc() - start_pc; + start_pc = __ pc(); + // Get selected method from declaring class and itable index const Register method = rbx; __ load_klass(recv_klass_reg, j_rarg0); // restore recv_klass_reg @@ -187,6 +212,17 @@ method, temp_reg, L_no_such_interface); + const ptrdiff_t lookupSize = __ pc() - start_pc; + + // We expect we need slop32 extra bytes. Reason: + // The emitted code in lookup_interface_method changes when itable_index exceeds 15. + // For linux, a very narrow estimate would be 112, but Solaris requires some more space (130). + const ptrdiff_t estimate = 136; + const ptrdiff_t codesize = typecheckSize + lookupSize + slop32; + slop_delta = (int)(estimate - codesize); + slop_bytes += slop_delta; + assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); + // If we take a trap while this arg is on the stack we will not // be able to walk the stack properly. This is not an issue except // when there are mistakes in this assembly code that could generate @@ -207,8 +243,6 @@ } #endif // ASSERT - // rbx: Method* - // j_rarg0: receiver address ame_addr = __ pc(); __ jmp(Address(method, Method::from_compiled_offset())); @@ -220,68 +254,15 @@ // dirty work. __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); - __ flush(); + masm->flush(); + slop_bytes += slop32; // add'l slop for size variance due to large itable offsets + bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, slop32); - if (PrintMiscellaneous && (WizardMode || Verbose)) { - tty->print_cr("itable #%d at " PTR_FORMAT "[%d] left over: %d", - itable_index, p2i(s->entry_point()), - (int)(s->code_end() - s->entry_point()), - (int)(s->code_end() - __ pc())); - } - guarantee(__ pc() <= s->code_end(), "overflowed buffer"); - // shut the door on sizing bugs - int slop = 3; // 32-bit offset is this much larger than an 8-bit one - assert(itable_index > 10 || __ pc() + slop <= s->code_end(), "room for 32-bit offset"); - - s->set_exception_points(npe_addr, ame_addr); return s; } -int VtableStub::pd_code_size_limit(bool is_vtable_stub) { - if (is_vtable_stub) { - // Vtable stub size - return (DebugVtables ? 512 : 24) + (CountCompiledCalls ? 13 : 0) + - (UseCompressedClassPointers ? MacroAssembler::instr_size_for_decode_klass_not_null() : 0); - } else { - // Itable stub size - return (DebugVtables ? 512 : 140) + (CountCompiledCalls ? 13 : 0) + - (UseCompressedClassPointers ? 2 * MacroAssembler::instr_size_for_decode_klass_not_null() : 0); - } - // In order to tune these parameters, run the JVM with VM options - // +PrintMiscellaneous and +WizardMode to see information about - // actual itable stubs. Look for lines like this: - // itable #1 at 0x5551212[71] left over: 3 - // Reduce the constants so that the "left over" number is >=3 - // for the common cases. - // Do not aim at a left-over number of zero, because a - // large vtable or itable index (>= 32) will require a 32-bit - // immediate displacement instead of an 8-bit one. - // - // The JVM98 app. _202_jess has a megamorphic interface call. - // The itable code looks like this: - // Decoding VtableStub itbl[1]@12 - // mov 0x8(%rsi),%r10 - // mov 0x198(%r10),%r11d - // lea 0x218(%r10,%r11,8),%r11 - // lea 0x8(%r10),%r10 - // mov (%r11),%rbx - // cmp %rbx,%rax - // je success - // loop: - // test %rbx,%rbx - // je throw_icce - // add $0x10,%r11 - // mov (%r11),%rbx - // cmp %rbx,%rax - // jne loop - // success: - // mov 0x8(%r11),%r11d - // mov (%r10,%r11,1),%rbx - // jmpq *0x60(%rbx) - // throw_icce: - // jmpq throw_ICCE_entry +int VtableStub::pd_code_alignment() { + // x86 cache line size is 64 bytes, but we want to limit alignment loss. + const unsigned int icache_line_size = wordSize; + return icache_line_size; } - -int VtableStub::pd_code_alignment() { - return wordSize; -} diff --git a/src/hotspot/share/code/vtableStubs.cpp b/src/hotspot/share/code/vtableStubs.cpp --- a/src/hotspot/share/code/vtableStubs.cpp +++ b/src/hotspot/share/code/vtableStubs.cpp @@ -90,8 +90,12 @@ // hash value). Each list is anchored in a little hash _table, indexed // by that hash value. +static int const firstStub_size = 1024; + VtableStub* VtableStubs::_table[VtableStubs::N]; int VtableStubs::_number_of_vtable_stubs = 0; +int VtableStubs::_vtab_stub_size = 0; +int VtableStubs::_itab_stub_size = 0; void VtableStubs::initialize() { @@ -107,6 +111,67 @@ } +int VtableStub::code_size_limit(bool is_vtable_stub) { + if (is_vtable_stub) { + return VtableStubs::_vtab_stub_size > 0 ? VtableStubs::_vtab_stub_size + : firstStub_size; + } else { // itable stub + return VtableStubs::_itab_stub_size > 0 ? VtableStubs::_itab_stub_size + : firstStub_size; + } +} // code_size_limit + + +void VtableStub::check_and_set_size_limit(bool is_vtable_stub, + int code_size, + int padding ) { + const char* name = is_vtable_stub ? "vtable" : "itable"; + + guarantee(code_size <= code_size_limit(is_vtable_stub), + "buffer overflow in %s stub, code_size is %d, limit is %d", name, code_size, code_size_limit(is_vtable_stub)); + + if (is_vtable_stub) { + if ( code_size > VtableStubs::_vtab_stub_size - padding ) { + VtableStubs::_vtab_stub_size = code_size + padding; + } + } else { // itable stub + if ( code_size > VtableStubs::_itab_stub_size - padding ) { + VtableStubs::_itab_stub_size = code_size + padding; + } + } + return; +} // check_and_set_size_limit + + +void VtableStubs::bookkeeping(MacroAssembler* masm, outputStream* out, VtableStub* s, + address npe_addr, address ame_addr, bool is_vtable_stub, + int index, int slop_bytes, int slop32) { + const char* name = is_vtable_stub ? "vtable" : "itable"; + const int stub_length = VtableStub::code_size_limit(is_vtable_stub); + + if (PrintMiscellaneous && (WizardMode || Verbose)) { + out->print_cr("%s #%d at " PTR_FORMAT "[%d], estimate %d, left over: %d", + name, index, p2i(s->entry_point()), + (int)(s->code_end() - s->entry_point()), + stub_length, + (int)(s->code_end() - masm->pc())); + } + guarantee(masm->pc() <= s->code_end(), "%s #%d: overflowed buffer, estimated len: %d, actual len: %d, overrun: %d", + name, index, stub_length, + (int)(masm->pc() - s->code_begin()), + (int)(masm->pc() - s->code_end())); + assert((masm->pc() + slop32) <= s->code_end(), "%s #%d: spare space for 32-bit offset: required = %d, available = %d", + name, index, slop32, + (int)(s->code_end() - masm->pc())); + + // After the first vtable/itable stub is generated, we have a much + // better estimate for the stub size. Remember/update this + // estimate after some sanity checks. + s->check_and_set_size_limit(is_vtable_stub, masm->offset(), slop_bytes); + s->set_exception_points(npe_addr, ame_addr); +} + + address VtableStubs::find_stub(bool is_vtable_stub, int vtable_index) { assert(vtable_index >= 0, "must be positive"); @@ -173,10 +238,7 @@ uint hash = VtableStubs::hash(stub->is_vtable_stub(), stub->index()); VtableStub* s; for (s = _table[hash]; s != NULL && s != stub; s = s->next()) {} - if (s == stub) { - return s; - } - return NULL; + return (s == stub) ? s : NULL; } bool VtableStubs::contains(address pc) { diff --git a/src/hotspot/share/code/vtableStubs.hpp b/src/hotspot/share/code/vtableStubs.hpp --- a/src/hotspot/share/code/vtableStubs.hpp +++ b/src/hotspot/share/code/vtableStubs.hpp @@ -25,12 +25,48 @@ #ifndef SHARE_VM_CODE_VTABLESTUBS_HPP #define SHARE_VM_CODE_VTABLESTUBS_HPP +#include "asm/macroAssembler.hpp" #include "code/vmreg.hpp" #include "memory/allocation.hpp" // A VtableStub holds an individual code stub for a pair (vtable index, #args) for either itables or vtables // There's a one-to-one relationship between a VtableStub and such a pair. +// A word on VtableStub sizing: +// Such a vtable/itable stub consists of the instance data +// and an immediately following CodeBuffer. +// Unfortunately, the required space for the code buffer varies, depending on +// the setting of compile time macros (PRODUCT, ASSERT, ...) and of command line +// parameters. Actual data may have an influence on the size as well. +// +// A simple approximation for the VtableStub size would be to just take a value +// "large enough" for all circumstances - a worst case estimate. +// As there can exist many stubs - and they never go away - we certainly don't +// want to waste more code cache space than absolutely necessary. +// +// We need a different approach which, as far as possible, should be independent +// from or adaptive to code size variations. These variations may be caused by +// changed compile time or run time switches as well as by changed emitter code. +// +// Here is the idea: +// For the first stub we generate, we allocate a "large enough" code buffer. +// Once all instructions are emitted, we know the actual size of the stub. +// Remembering that size allows us to allocate a tightly matching code buffer +// for all subsequent stubs. That covers all "static variance", i.e. all variance +// that is due to compile time macros, command line parameters, machine capabilities, +// and other influences which are immutable for the life span of the vm. +// +// Life isn't always that easy. Code size may depend on actual data, "load constant" +// being an example for that. All code segments with such "dynamic variance" require +// additional care. We need to know or estimate the worst case code size for each +// such segment. With that knowledge, we can maintain a "slop counter" in the +// platform-specific stub emitters. It accumulates the difference between worst-case +// and actual code size. When the stub is fully generated, the actual stub size is +// adjusted (increased) by the slop counter value. +// +// As a result, we allocate all but the first code buffers with the same, tightly matching size. +// + class VtableStub { private: friend class VtableStubs; @@ -58,7 +94,7 @@ public: address code_begin() const { return (address)(this + 1); } - address code_end() const { return code_begin() + pd_code_size_limit(_is_vtable_stub); } + address code_end() const { return code_begin() + code_size_limit(_is_vtable_stub); } address entry_point() const { return code_begin(); } static int entry_offset() { return sizeof(class VtableStub); } @@ -78,7 +114,6 @@ } // platform-dependent routines - static int pd_code_size_limit(bool is_vtable_stub); static int pd_code_alignment(); // CNC: Removed because vtable stubs are now made with an ideal graph // static bool pd_disregard_arg_size(); @@ -95,6 +130,11 @@ bool is_abstract_method_error(address epc) { return epc == code_begin()+_ame_offset; } bool is_null_pointer_exception(address epc) { return epc == code_begin()+_npe_offset; } + static int code_size_limit( bool is_vtable_stub ); + static void check_and_set_size_limit( bool is_vtable_stub, + int code_size, + int padding ); + void print_on(outputStream* st) const; void print() const { print_on(tty); } @@ -122,10 +162,18 @@ static void enter (bool is_vtable_stub, int vtable_index, VtableStub* s); static inline uint hash (bool is_vtable_stub, int vtable_index); static address find_stub (bool is_vtable_stub, int vtable_index); + static void bookkeeping(MacroAssembler* masm, outputStream* out, VtableStub* s, + address npe_addr, address ame_addr, bool is_vtable_stub, + int index, int slop_bytes, int slop32); public: static address find_vtable_stub(int vtable_index) { return find_stub(true, vtable_index); } static address find_itable_stub(int itable_index) { return find_stub(false, itable_index); } + + // SAPJVM PJ 2007-09-24 introduce flexible code buffer size for the stubs + static int _vtab_stub_size; + static int _itab_stub_size; + static VtableStub* entry_point(address pc); // vtable stub entry point for a pc static bool contains(address pc); // is pc within any stub? static VtableStub* stub_containing(address pc); // stub containing pc or NULL