--- old/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp 2019-03-11 14:24:57.510356104 +0100 +++ new/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp 2019-03-11 14:24:57.306356107 +0100 @@ -28,6 +28,7 @@ #endif #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "classfile/symbolTable.hpp" #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/nativeInst.hpp" @@ -491,6 +492,7 @@ case T_OBJECT: case T_ARRAY: case T_ADDRESS: + case T_VALUETYPE: if (int_args < Argument::n_int_register_parameters_j) { regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); } else { @@ -524,6 +526,88 @@ return align_up(stk_args, 2); } +// Same as java_calling_convention() but for multiple return +// values. There's no way to store them on the stack so if we don't +// have enough registers, multiple values can't be returned. +const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; +const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; +int SharedRuntime::java_return_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed) { + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[java_return_convention_max_int] = { + rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 + }; + static const XMMRegister FP_ArgReg[java_return_convention_max_float] = { + j_farg0, j_farg1, j_farg2, j_farg3, + j_farg4, j_farg5, j_farg6, j_farg7 + }; + + + uint int_args = 0; + uint fp_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_int_register_parameters_j+1) { + regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); + int_args++; + } else { + return -1; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_VALUETYPE: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (int_args < Argument::n_int_register_parameters_j+1) { + regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); + int_args++; + } else { + return -1; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); + fp_args++; + } else { + return -1; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); + fp_args++; + } else { + return -1; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return int_args + fp_args; +} + // Patch the callers callsite with entry to compiled code if it exists. static void patch_callers_callsite(MacroAssembler *masm) { Label L; @@ -566,13 +650,127 @@ __ bind(L); } +// For each value type argument, sig includes the list of fields of +// the value type. This utility function computes the number of +// arguments for the call if value types are passed by reference (the +// calling convention the interpreter expects). +static int compute_total_args_passed_int(const GrowableArray* sig_extended) { + int total_args_passed = 0; + if (ValueTypePassFieldsAsArgs) { + for (int i = 0; i < sig_extended->length(); i++) { + BasicType bt = sig_extended->at(i)._bt; + if (SigEntry::is_reserved_entry(sig_extended, i)) { + // Ignore reserved entry + } else if (bt == T_VALUETYPE) { + // In sig_extended, a value type argument starts with: + // T_VALUETYPE, followed by the types of the fields of the + // value type and T_VOID to mark the end of the value + // type. Value types are flattened so, for instance, in the + // case of a value type with an int field and a value type + // field that itself has 2 fields, an int and a long: + // T_VALUETYPE T_INT T_VALUETYPE T_INT T_LONG T_VOID (second + // slot for the T_LONG) T_VOID (inner T_VALUETYPE) T_VOID + // (outer T_VALUETYPE) + total_args_passed++; + int vt = 1; + do { + i++; + BasicType bt = sig_extended->at(i)._bt; + BasicType prev_bt = sig_extended->at(i-1)._bt; + if (bt == T_VALUETYPE) { + vt++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + } + } while (vt != 0); + } else { + total_args_passed++; + } + } + } else { + total_args_passed = sig_extended->length(); + } + return total_args_passed; +} + + +static void gen_c2i_adapter_helper(MacroAssembler* masm, + BasicType bt, + BasicType prev_bt, + size_t size_in_bytes, + const VMRegPair& reg_pair, + const Address& to, + int extraspace, + bool is_oop) { + assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here"); + if (bt == T_VOID) { + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a long/double in + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + bool wide = (size_in_bytes == wordSize); + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + assert(r_2->is_valid() == wide, "invalid size"); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), "must be invalid"); + return; + } + + if (!r_1->is_XMMRegister()) { + Register val = rax; + assert_different_registers(to.base(), val); + if(r_1->is_stack()) { + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); + } else { + val = r_1->as_Register(); + } + if (is_oop) { + // We don't need barriers because the destination is a newly allocated object. + // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp. + if (UseCompressedOops) { + __ encode_heap_oop(val); + __ movl(to, val); + } else { + __ movptr(to, val); + } + } else { + __ store_sized_value(to, val, size_in_bytes); + } + } else { + if (wide) { + __ movdbl(to, r_1->as_XMMRegister()); + } else { + __ movflt(to, r_1->as_XMMRegister()); + } + } +} static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig_extended, const VMRegPair *regs, - Label& skip_fixup) { + Label& skip_fixup, + address start, + OopMapSet* oop_maps, + int& frame_complete, + int& frame_size_in_words, + bool alloc_value_receiver) { // Before we get into the guts of the C2I adapter, see if we should be here // at all. We've come from compiled code and are attempting to jump to the // interpreter, which means the caller made a static call to get here @@ -582,11 +780,54 @@ __ bind(skip_fixup); + bool has_value_argument = false; + if (ValueTypePassFieldsAsArgs) { + // Is there a value type argument? + for (int i = 0; i < sig_extended->length() && !has_value_argument; i++) { + has_value_argument = (sig_extended->at(i)._bt == T_VALUETYPE); + } + if (has_value_argument) { + // There is at least a value type argument: we're coming from + // compiled code so we have no buffers to back the value + // types. Allocate the buffers here with a runtime call. + OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + frame_complete = __ offset(); + + __ set_last_Java_frame(noreg, noreg, NULL); + + __ mov(c_rarg0, r15_thread); + __ mov(c_rarg1, rbx); + __ mov64(c_rarg2, (int64_t)alloc_value_receiver); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_value_types))); + + oop_maps->add_gc_map((int)(__ pc() - start), map); + __ reset_last_Java_frame(false); + + RegisterSaver::restore_live_registers(masm); + + Label no_exception; + __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); + __ jcc(Assembler::equal, no_exception); + + __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); + __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + __ bind(no_exception); + + // We get an array of objects from the runtime call + __ get_vm_result(r13, r15_thread); // Use r13 as temporary because r10 is trashed by movptr() + __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live? + __ mov(r10, r13); + } + } + // Since all args are passed on the stack, total_args_passed * // Interpreter::stackElementSize is the space we need. Plus 1 because // we also account for the return address location since // we store it first rather than hold it in rax across all the shuffling - + int total_args_passed = compute_total_args_passed_int(sig_extended); int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; // stack is aligned, keep it that way @@ -604,97 +845,99 @@ __ movptr(Address(rsp, 0), rax); // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - // offset to start parameters - int st_off = (total_args_passed - i) * Interpreter::stackElementSize; - int next_off = st_off - Interpreter::stackElementSize; - - // Say 4 args: - // i st_off - // 0 32 T_LONG - // 1 24 T_VOID - // 2 16 T_OBJECT - // 3 8 T_BOOL - // - 0 return address - // - // However to make thing extra confusing. Because we can fit a long/double in - // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter - // leaves one slot empty and only stores to a single slot. In this case the - // slot that is occupied is the T_VOID slot. See I said it was confusing. - - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - if (r_1->is_stack()) { - // memory to memory use rax - int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; - if (!r_2->is_valid()) { - // sign extend?? - __ movl(rax, Address(rsp, ld_off)); - __ movptr(Address(rsp, st_off), rax); - - } else { - - __ movq(rax, Address(rsp, ld_off)); - - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // ld_off == LSW, ld_off+wordSize == MSW - // st_off == MSW, next_off == LSW - __ movq(Address(rsp, next_off), rax); -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); - __ movptr(Address(rsp, st_off), rax); -#endif /* ASSERT */ - } else { - __ movq(Address(rsp, st_off), rax); - } + // next_arg_comp is the next argument from the compiler point of + // view (value type fields are passed in registers/on the stack). In + // sig_extended, a value type argument starts with: T_VALUETYPE, + // followed by the types of the fields of the value type and T_VOID + // to mark the end of the value type. ignored counts the number of + // T_VALUETYPE/T_VOID. next_vt_arg is the next value type argument: + // used to get the buffer for that argument from the pool of buffers + // we allocated above and want to pass to the + // interpreter. next_arg_int is the next argument from the + // interpreter point of view (value types are passed by reference). + bool has_oop_field = false; + for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; + next_arg_comp < sig_extended->length(); next_arg_comp++) { + assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); + assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); + BasicType bt = sig_extended->at(next_arg_comp)._bt; + int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize; + if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) { + if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) { + continue; // Ignore reserved entry } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { - // must be only an int (or less ) so move only 32bits to slot - // why not sign extend?? - __ movl(Address(rsp, st_off), r); - } else { - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // long/double in gpr -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaab)); - __ movptr(Address(rsp, st_off), rax); -#endif /* ASSERT */ - __ movq(Address(rsp, next_off), r); - } else { - __ movptr(Address(rsp, st_off), r); - } - } - } else { - assert(r_1->is_XMMRegister(), ""); - if (!r_2->is_valid()) { - // only a float use just part of the slot - __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); - } else { + int next_off = st_off - Interpreter::stackElementSize; + const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; + const VMRegPair reg_pair = regs[next_arg_comp-ignored]; + size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; + gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, + size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false); + next_arg_int++; #ifdef ASSERT + if (bt == T_LONG || bt == T_DOUBLE) { // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaac)); + __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); __ movptr(Address(rsp, st_off), rax); -#endif /* ASSERT */ - __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister()); } +#endif /* ASSERT */ + } else { + ignored++; + // get the buffer from the just allocated pool of buffers + int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_VALUETYPE); + __ load_heap_oop(r11, Address(r10, index)); + next_vt_arg++; next_arg_int++; + int vt = 1; + // write fields we get from compiled code in registers/stack + // slots to the buffer: we know we are done with that value type + // argument when we hit the T_VOID that acts as an end of value + // type delimiter for this value type. Value types are flattened + // so we might encounter embedded value types. Each entry in + // sig_extended contains a field offset in the buffer. + do { + next_arg_comp++; + BasicType bt = sig_extended->at(next_arg_comp)._bt; + BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt; + if (bt == T_VALUETYPE) { + vt++; + ignored++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + ignored++; + } else if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) { + // Ignore reserved entry + } else { + int off = sig_extended->at(next_arg_comp)._offset; + assert(off > 0, "offset in object should be positive"); + size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; + bool is_oop = (bt == T_OBJECT || bt == T_ARRAY); + has_oop_field = has_oop_field || is_oop; + gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, + size_in_bytes, regs[next_arg_comp-ignored], Address(r11, off), extraspace, is_oop); + } + } while (vt != 0); + // pass the buffer to the interpreter + __ movptr(Address(rsp, st_off), r11); + } + } + + // If a value type was allocated and initialized, apply post barrier to all oop fields + if (has_value_argument && has_oop_field) { + __ push(r13); // save senderSP + __ push(rbx); // save callee + // Allocate argument register save area + if (frame::arg_reg_save_area_bytes != 0) { + __ subptr(rsp, frame::arg_reg_save_area_bytes); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::apply_post_barriers), r15_thread, r10); + // De-allocate argument register save area + if (frame::arg_reg_save_area_bytes != 0) { + __ addptr(rsp, frame::arg_reg_save_area_bytes); } + __ pop(rbx); // restore callee + __ pop(r13); // restore sender SP } // Schedule the branch target address early. @@ -715,10 +958,60 @@ __ bind(L_fail); } +static void gen_i2c_adapter_helper(MacroAssembler* masm, + BasicType bt, + BasicType prev_bt, + size_t size_in_bytes, + const VMRegPair& reg_pair, + const Address& from, + bool is_oop) { + assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here"); + if (bt == T_VOID) { + // Longs and doubles are passed in native word order, but misaligned + // in the 32-bit build. + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + assert(!reg_pair.second()->is_valid() || reg_pair.first()->next() == reg_pair.second(), + "scrambled load targets?"); + + bool wide = (size_in_bytes == wordSize); + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + assert(r_2->is_valid() == wide, "invalid size"); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), "must be invalid"); + return; + } + + bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN); + if (!r_1->is_XMMRegister()) { + // We can use r13 as a temp here because compiled code doesn't need r13 as an input + // and if we end up going thru a c2i because of a miss a reasonable value of r13 + // will be generated. + Register dst = r_1->is_stack() ? r13 : r_1->as_Register(); + if (is_oop) { + __ load_heap_oop(dst, from); + } else { + __ load_sized_value(dst, from, size_in_bytes, is_signed); + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to account for return address) + int st_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + __ movq(Address(rsp, st_off), dst); + } + } else { + if (wide) { + __ movdbl(r_1->as_XMMRegister(), from); + } else { + __ movflt(r_1->as_XMMRegister(), from); + } + } +} + void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray* sig, const VMRegPair *regs) { // Note: r13 contains the senderSP on entry. We must preserve it since @@ -796,7 +1089,6 @@ __ subptr(rsp, comp_words_on_stack * wordSize); } - // Ensure compiled code always sees stack at proper alignment __ andptr(rsp, -16); @@ -810,7 +1102,13 @@ // Will jump to the compiled code just as if compiled code was doing it. // Pre-load the register-jump target early, to schedule it better. - __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset()))); + if (StressValueTypePassFieldsAsArgs) { + // For stress testing, don't unpack value types in the i2c adapter but + // call the value type entry point and let it take care of unpacking. + __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_value_offset()))); + } else { + __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset()))); + } #if INCLUDE_JVMCI if (EnableJVMCI || UseAOT) { @@ -824,84 +1122,69 @@ } #endif // INCLUDE_JVMCI + int total_args_passed = compute_total_args_passed_int(sig); // Now generate the shuffle code. Pick up all register args and move the // rest through the floating point stack top. - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - // Longs and doubles are passed in native word order, but misaligned - // in the 32-bit build. - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // Pick up 0, 1 or 2 words from SP+offset. - - assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), - "scrambled load targets?"); - // Load in argument order going down. - int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; - // Point to interpreter value (vs. tag) - int next_off = ld_off - Interpreter::stackElementSize; - // - // - // - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - if (r_1->is_stack()) { - // Convert stack slot to an SP offset (+ wordSize to account for return address ) - int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; - // We can use r13 as a temp here because compiled code doesn't need r13 as an input - // and if we end up going thru a c2i because of a miss a reasonable value of r13 - // will be generated. - if (!r_2->is_valid()) { - // sign extend??? - __ movl(r13, Address(saved_sp, ld_off)); - __ movptr(Address(rsp, st_off), r13); - } else { - // - // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE - // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the interpreter. - // - // Interpreter local[n] == MSW, local[n+1] == LSW however locals - // are accessed as negative so LSW is at LOW address - - // ld_off is MSW so get LSW - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; - __ movq(r13, Address(saved_sp, offset)); - // st_off is LSW (i.e. reg.first()) - __ movq(Address(rsp, st_off), r13); - } - } else if (r_1->is_Register()) { // Register argument - Register r = r_1->as_Register(); - assert(r != rax, "must be different"); - if (r_2->is_valid()) { - // - // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE - // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the interpreter. - - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; - - // this can be a misaligned move - __ movq(r, Address(saved_sp, offset)); - } else { - // sign extend and use a full word? - __ movl(r, Address(saved_sp, ld_off)); + // next_arg_comp is the next argument from the compiler point of + // view (value type fields are passed in registers/on the stack). In + // sig_extended, a value type argument starts with: T_VALUETYPE, + // followed by the types of the fields of the value type and T_VOID + // to mark the end of the value type. ignored counts the number of + // T_VALUETYPE/T_VOID. next_arg_int is the next argument from the + // interpreter point of view (value types are passed by reference). + for (int next_arg_comp = 0, ignored = 0, next_arg_int = 0; next_arg_comp < sig->length(); next_arg_comp++) { + assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); + assert(next_arg_int <= total_args_passed, "more arguments from the interpreter than expected?"); + BasicType bt = sig->at(next_arg_comp)._bt; + int ld_off = (total_args_passed - next_arg_int)*Interpreter::stackElementSize; + if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) { + // Load in argument order going down. + // Point to interpreter value (vs. tag) + if (SigEntry::is_reserved_entry(sig, next_arg_comp)) { + continue; // Ignore reserved entry } + int next_off = ld_off - Interpreter::stackElementSize; + int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; + const VMRegPair reg_pair = regs[next_arg_comp-ignored]; + size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; + gen_i2c_adapter_helper(masm, bt, next_arg_comp > 0 ? sig->at(next_arg_comp-1)._bt : T_ILLEGAL, + size_in_bytes, reg_pair, Address(saved_sp, offset), false); + next_arg_int++; } else { - if (!r_2->is_valid()) { - __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); - } else { - __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); - } + next_arg_int++; + ignored++; + // get the buffer for that value type + __ movptr(r10, Address(saved_sp, ld_off)); + int vt = 1; + // load fields to registers/stack slots from the buffer: we know + // we are done with that value type argument when we hit the + // T_VOID that acts as an end of value type delimiter for this + // value type. Value types are flattened so we might encounter + // embedded value types. Each entry in sig_extended contains a + // field offset in the buffer. + do { + next_arg_comp++; + BasicType bt = sig->at(next_arg_comp)._bt; + BasicType prev_bt = sig->at(next_arg_comp-1)._bt; + if (bt == T_VALUETYPE) { + vt++; + ignored++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + ignored++; + } else if (SigEntry::is_reserved_entry(sig, next_arg_comp)) { + // Ignore reserved entry + } else { + int off = sig->at(next_arg_comp)._offset; + assert(off > 0, "offset in object should be positive"); + size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; + bool is_oop = (bt == T_OBJECT || bt == T_ARRAY); + gen_i2c_adapter_helper(masm, bt, prev_bt, size_in_bytes, regs[next_arg_comp - ignored], Address(r10, off), is_oop); + } + } while (vt != 0); } } @@ -918,7 +1201,7 @@ __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // put Method* where a c2i would expect should we end up there - // only needed becaus eof c2 resolve stubs return Method* as a result in + // only needed because of c2 resolve stubs return Method* as a result in // rax __ mov(rax, rbx); __ jmp(r11); @@ -926,14 +1209,24 @@ // --------------------------------------------------------------- AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { + int comp_args_on_stack_cc, + const GrowableArray* sig, + const VMRegPair* regs, + const GrowableArray* sig_cc, + const VMRegPair* regs_cc, + const GrowableArray* sig_cc_ro, + const VMRegPair* regs_cc_ro, + AdapterFingerPrint* fingerprint, + AdapterBlob*& new_adapter) { address i2c_entry = __ pc(); - - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + if (StressValueTypePassFieldsAsArgs) { + // For stress testing, don't unpack value types in the i2c adapter but + // call the value type entry point and let it take care of unpacking. + gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); + } else { + gen_i2c_adapter(masm, comp_args_on_stack_cc, sig_cc, regs_cc); + } // ------------------------------------------------------------------------- // Generate a C2I adapter. On entry we know rbx holds the Method* during calls @@ -968,12 +1261,38 @@ __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); } + OopMapSet* oop_maps = new OopMapSet(); + int frame_complete = CodeOffsets::frame_never_safe; + int frame_size_in_words = 0; + + // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) + address c2i_value_ro_entry = __ pc(); + if (regs_cc != regs_cc_ro) { + Label unused; + gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); + skip_fixup = unused; + } + + // Scalarized c2i adapter address c2i_entry = __ pc(); + gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true); - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + // Non-scalarized c2i adapter + address c2i_value_entry = c2i_entry; + if (regs != regs_cc) { + c2i_value_entry = __ pc(); + Label unused; + gen_c2i_adapter(masm, sig, regs, unused, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); + } __ flush(); - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); + + // The c2i adapters might safepoint and trigger a GC. The caller must make sure that + // the GC knows about the location of oop argument locations passed to the c2i adapter. + bool caller_must_gc_arguments = (regs != regs_cc); + new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); + + return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_value_entry, c2i_value_ro_entry, c2i_unverified_entry); } int SharedRuntime::c_calling_convention(const BasicType *sig_bt, @@ -1031,6 +1350,7 @@ // fall through case T_OBJECT: case T_ARRAY: + case T_VALUETYPE: case T_ADDRESS: case T_METADATA: if (int_args < Argument::n_int_register_parameters_c) { @@ -1381,7 +1701,7 @@ if (map != NULL) { __ movq(Address(rsp, offset), in_regs[i].first()->as_Register()); if (in_sig_bt[i] == T_ARRAY) { - map->set_oop(VMRegImpl::stack2reg(slot));; + map->set_oop(VMRegImpl::stack2reg(slot)); } } else { __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset)); @@ -1415,6 +1735,7 @@ // handled above break; case T_OBJECT: + case T_VALUETYPE: default: ShouldNotReachHere(); } } else if (in_regs[i].first()->is_XMMRegister()) { @@ -1790,7 +2111,8 @@ if (VerifyOops) { for (int i = 0; i < method->size_of_parameters(); i++) { if (sig_bt[i] == T_OBJECT || - sig_bt[i] == T_ARRAY) { + sig_bt[i] == T_ARRAY || + sig_bt[i] == T_VALUETYPE) { VMReg r = regs[i].first(); assert(r->is_valid(), "bad oop arg"); if (r->is_stack()) { @@ -2330,6 +2652,7 @@ #endif break; } + case T_VALUETYPE: case T_OBJECT: assert(!is_critical_native, "no oop arguments"); object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], @@ -2465,6 +2788,10 @@ // Load (object->mark() | 1) into swap_reg %rax __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + if (EnableValhalla && !UseBiasedLocking) { + // For slow path is_always_locked, using biased, which is never natural for !UseBiasLocking + __ andptr(swap_reg, ~markOopDesc::biased_lock_bit_in_place); + } // Save (object->mark() | 1) into BasicLock's displaced header __ movptr(Address(lock_reg, mark_word_offset), swap_reg); @@ -2526,6 +2853,7 @@ // Result is in xmm0 we'll save as needed break; case T_ARRAY: // Really a handle + case T_VALUETYPE: // Really a handle case T_OBJECT: // Really a handle break; // can't de-handlize until after safepoint check case T_VOID: break; @@ -2679,7 +3007,7 @@ __ reset_last_Java_frame(false); // Unbox oop result, e.g. JNIHandles::resolve value. - if (ret_type == T_OBJECT || ret_type == T_ARRAY) { + if (ret_type == T_OBJECT || ret_type == T_ARRAY || ret_type == T_VALUETYPE) { __ resolve_jobject(rax /* value */, r15_thread /* thread */, rcx /* tmp */); @@ -4025,3 +4353,114 @@ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); } #endif // COMPILER2 + +BufferedValueTypeBlob* SharedRuntime::generate_buffered_value_type_adapter(const ValueKlass* vk) { + BufferBlob* buf = BufferBlob::create("value types pack/unpack", 16 * K); + CodeBuffer buffer(buf); + short buffer_locs[20]; + buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, + sizeof(buffer_locs)/sizeof(relocInfo)); + + MacroAssembler* masm = new MacroAssembler(&buffer); + + const Array* sig_vk = vk->extended_sig(); + const Array* regs = vk->return_regs(); + + int pack_fields_off = __ offset(); + + int j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_VALUETYPE) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + assert(off > 0, "offset in object should be positive"); + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address to(rax, off); + if (bt == T_FLOAT) { + __ movflt(to, r_1->as_XMMRegister()); + } else if (bt == T_DOUBLE) { + __ movdbl(to, r_1->as_XMMRegister()); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + Register val = r_1->as_Register(); + assert_different_registers(rax, val); + // We don't need barriers because the destination is a newly allocated object. + // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp. + if (UseCompressedOops) { + __ encode_heap_oop(val); + __ movl(to, val); + } else { + __ movptr(to, val); + } + + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + assert_different_registers(rax, r_1->as_Register()); + size_t size_in_bytes = type2aelembytes(bt); + __ store_sized_value(to, r_1->as_Register(), size_in_bytes); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + __ ret(0); + + int unpack_fields_off = __ offset(); + + j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_VALUETYPE) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + assert(off > 0, "offset in object should be positive"); + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address from(rax, off); + if (bt == T_FLOAT) { + __ movflt(r_1->as_XMMRegister(), from); + } else if (bt == T_DOUBLE) { + __ movdbl(r_1->as_XMMRegister(), from); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + assert_different_registers(rax, r_1->as_Register()); + __ load_heap_oop(r_1->as_Register(), from); + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + assert_different_registers(rax, r_1->as_Register()); + size_t size_in_bytes = type2aelembytes(bt); + __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + if (StressValueTypeReturnedAsFields) { + __ load_klass(rax, rax); + __ orptr(rax, 1); + } + + __ ret(0); + + __ flush(); + + return BufferedValueTypeBlob::create(&buffer, pack_fields_off, unpack_fields_off); +}