--- old/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2016-11-24 11:13:23.123604234 +0100 +++ new/src/cpu/x86/vm/sharedRuntime_x86_64.cpp 2016-11-24 11:13:23.057604614 +0100 @@ -545,13 +545,136 @@ __ bind(L); } +// For each value type argument, sig includes the list of fields of +// the value type. This utility function computes the number of +// arguments for the call if value types are passed by reference (the +// calling convention the interpreter expects). +static int compute_total_args_passed(const GrowableArray& sig) { + int total_args_passed = 0; + if (ValueTypePassFieldsAsArgs) { + for (int i = 0; i < sig.length(); i++) { + BasicType bt = sig.at(i)._bt; + if (bt == T_VALUETYPE) { + // In sig, a value type argument starts with: T_VALUETYPE, + // followed by the types of the fields of the value type and + // T_VOID to mark the end of the value type. Value types are + // flattened so, for instance: T_VALUETYPE T_INT T_VALUETYPE + // T_INT T_LONG T_VOID T_VOID T_VOID is a value type with a + // int field an a value type field that itself has 2 fields, a + // int and a long + total_args_passed++; + int vt = 1; + do { + i++; + BasicType bt = sig.at(i)._bt; + BasicType prev_bt = sig.at(i-1)._bt; + if (bt == T_VALUETYPE) { + vt++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + } + } while (vt != 0); + } else { + total_args_passed++; + } + } + } else { + total_args_passed = sig.length(); + } + return total_args_passed; +} + + +static void gen_c2i_adapter_helper(MacroAssembler *masm, + BasicType bt, + BasicType prev_bt, + const VMRegPair& reg_pair, + const Address& to, + int extraspace) { + assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here"); + if (bt == T_VOID) { + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a long/double in + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + return; + } + if (r_1->is_stack()) { + // memory to memory use rax + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + if (!r_2->is_valid()) { + // sign extend?? + __ movl(rax, Address(rsp, ld_off)); + __ movl(to, rax); + } else { + + __ movq(rax, Address(rsp, ld_off)); + + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( bt == T_LONG || bt == T_DOUBLE) { + // ld_off == LSW, ld_off+wordSize == MSW + // st_off == MSW, next_off == LSW + __ movq(to, rax); + } else { + __ movq(to, rax); + } + } + } else if (r_1->is_Register()) { + Register r = r_1->as_Register(); + if (!r_2->is_valid()) { + // must be only an int (or less ) so move only 32bits to slot + // why not sign extend?? + __ movl(to, r); + } else { + // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG + // T_DOUBLE and T_LONG use two slots in the interpreter + if ( bt == T_LONG || bt == T_DOUBLE) { + // long/double in gpr + __ movq(to, r); + } else { + __ movptr(to, r); + } + } + } else { + assert(r_1->is_XMMRegister(), ""); + if (!r_2->is_valid()) { + // only a float use just part of the slot + __ movflt(to, r_1->as_XMMRegister()); + } else { + __ movdbl(to, r_1->as_XMMRegister()); + } + } +} + static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray& sig, const VMRegPair *regs, - Label& skip_fixup) { + Label& skip_fixup, + address start, + OopMapSet*& oop_maps, + int& frame_complete, + int& frame_size_in_words) { // Before we get into the guts of the C2I adapter, see if we should be here // at all. We've come from compiled code and are attempting to jump to the // interpreter, which means the caller made a static call to get here @@ -561,11 +684,57 @@ __ bind(skip_fixup); + if (ValueTypePassFieldsAsArgs) { + // Is there a value type arguments? + int i = 0; + for (; i < sig.length() && sig.at(i)._bt != T_VALUETYPE; i++); + + if (i != sig.length()) { + // There is at least a value type argument: we're coming from + // compiled code so we have no buffers to back the value + // types. Allocate the buffers here with a runtime call. + oop_maps = new OopMapSet(); + OopMap* map = NULL; + + map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + frame_complete = __ offset(); + + __ set_last_Java_frame(noreg, noreg, NULL); + + __ mov(c_rarg0, r15_thread); + + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_value_types))); + + oop_maps->add_gc_map((int)(__ pc() - start), map); + __ reset_last_Java_frame(false, false); + + RegisterSaver::restore_live_registers(masm); + + Label no_exception; + __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); + __ jcc(Assembler::equal, no_exception); + + __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); + __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + __ bind(no_exception); + + // We get an array of objects from the runtime call + int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(T_OBJECT); + __ get_vm_result(r13, r15_thread); + __ addptr(r13, offset_in_bytes); + __ mov(r10, r13); + } + } + + // Since all args are passed on the stack, total_args_passed * // Interpreter::stackElementSize is the space we need. Plus 1 because // we also account for the return address location since // we store it first rather than hold it in rax across all the shuffling - + int total_args_passed = compute_total_args_passed(sig); int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; // stack is aligned, keep it that way @@ -583,96 +752,66 @@ __ movptr(Address(rsp, 0), rax); // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - // offset to start parameters - int st_off = (total_args_passed - i) * Interpreter::stackElementSize; - int next_off = st_off - Interpreter::stackElementSize; - - // Say 4 args: - // i st_off - // 0 32 T_LONG - // 1 24 T_VOID - // 2 16 T_OBJECT - // 3 8 T_BOOL - // - 0 return address - // - // However to make thing extra confusing. Because we can fit a long/double in - // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter - // leaves one slot empty and only stores to a single slot. In this case the - // slot that is occupied is the T_VOID slot. See I said it was confusing. - - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - if (r_1->is_stack()) { - // memory to memory use rax - int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; - if (!r_2->is_valid()) { - // sign extend?? - __ movl(rax, Address(rsp, ld_off)); - __ movptr(Address(rsp, st_off), rax); - - } else { - - __ movq(rax, Address(rsp, ld_off)); - - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // ld_off == LSW, ld_off+wordSize == MSW - // st_off == MSW, next_off == LSW - __ movq(Address(rsp, next_off), rax); + // i is the next argument from the compiler point of view (value + // type fields are passed in registers/on the stack). In sig, a + // value type argument starts with: T_VALUETYPE, followed by the + // types of the fields of the value type and T_VOID to mark the end + // of the value type. ignored counts the number of + // T_VALUETYPE/T_VOID. j is the next value type argument: used to + // get the buffer for that argument from the pool of buffers we + // allocated above and want to pass to the interpreter. k is the + // next argument from the interpreter point of view (value types are + // passed by reference). + for (int i = 0, ignored = 0, j = 0, k = 0; i < sig.length(); i++) { + assert((i == 0 && ignored == 0) || ignored < i, ""); + assert(k < total_args_passed, ""); + BasicType bt = sig.at(i)._bt; + int st_off = (total_args_passed - k) * Interpreter::stackElementSize; + if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) { + int next_off = st_off - Interpreter::stackElementSize; + const int offset = (bt==T_LONG||bt==T_DOUBLE) ? next_off : st_off; + gen_c2i_adapter_helper(masm, bt, i > 0 ? sig.at(i-1)._bt : T_ILLEGAL, regs[i-ignored], Address(rsp, offset), extraspace); + k++; #ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); - __ movptr(Address(rsp, st_off), rax); -#endif /* ASSERT */ - } else { - __ movq(Address(rsp, st_off), rax); - } + if (bt==T_LONG || bt==T_DOUBLE) { + // Overwrite the unused slot with known junk + __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); + __ movptr(Address(rsp, st_off), rax); } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { - // must be only an int (or less ) so move only 32bits to slot - // why not sign extend?? - __ movl(Address(rsp, st_off), r); - } else { - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // long/double in gpr -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaab)); - __ movptr(Address(rsp, st_off), rax); #endif /* ASSERT */ - __ movq(Address(rsp, next_off), r); + } else { + ignored++; + // get the buffer from the just allocated pool of buffers + __ load_heap_oop(r11, Address(r10, j * type2aelembytes(T_VALUETYPE))); + j++; k++; + int vt = 1; + // write fields we get from compiled code in registers/stack + // slots to the buffer: we know we are done with that value type + // argument when we hit the T_VOID that acts as an end of value + // type delimiter for this value type. Value types are flattened + // so we might encounter a embedded value types. Each entry in + // sig contains a field offset in the buffer. + do { + i++; + BasicType bt = sig.at(i)._bt; + BasicType prev_bt = sig.at(i-1)._bt; + if (bt == T_VALUETYPE) { + vt++; + ignored++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + ignored++; } else { - __ movptr(Address(rsp, st_off), r); + int off = sig.at(i)._offset; + assert(off > 0, ""); + gen_c2i_adapter_helper(masm, bt, i > 0 ? sig.at(i-1)._bt : T_ILLEGAL, regs[i-ignored], Address(r11, off), extraspace); } - } - } else { - assert(r_1->is_XMMRegister(), ""); - if (!r_2->is_valid()) { - // only a float use just part of the slot - __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); - } else { -#ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaac)); - __ movptr(Address(rsp, st_off), rax); -#endif /* ASSERT */ - __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister()); - } + } while (vt != 0); + // pass the buffer to the interpreter + __ movptr(Address(rsp, st_off), r11); } } @@ -694,10 +833,83 @@ __ bind(L_fail); } +static void gen_i2c_adapter_helper(MacroAssembler *masm, + BasicType bt, + BasicType prev_bt, + const VMRegPair& reg_pair, + const Address& from) { + assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here"); + if (bt == T_VOID) { + // Longs and doubles are passed in native word order, but misaligned + // in the 32-bit build. + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + // Pick up 0, 1 or 2 words from SP+offset. + + assert(!reg_pair.second()->is_valid() || reg_pair.first()->next() == reg_pair.second(), + "scrambled load targets?"); + // + // + // + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), ""); + return; + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to account for return address ) + int st_off = reg_pair.first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; + + // We can use r13 as a temp here because compiled code doesn't need r13 as an input + // and if we end up going thru a c2i because of a miss a reasonable value of r13 + // will be generated. + if (!r_2->is_valid()) { + // sign extend??? + __ movl(r13, from); + __ movptr(Address(rsp, st_off), r13); + } else { + // + // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE + // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the interpreter. + // + // Interpreter local[n] == MSW, local[n+1] == LSW however locals + // are accessed as negative so LSW is at LOW address + + // ld_off is MSW so get LSW + __ movq(r13, from); + // st_off is LSW (i.e. reg.first()) + __ movq(Address(rsp, st_off), r13); + } + } else if (r_1->is_Register()) { // Register argument + Register r = r_1->as_Register(); + assert(r != rax, "must be different"); + if (r_2->is_valid()) { + // + // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE + // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case + // So we must adjust where to pick up the data to match the interpreter. + + // this can be a misaligned move + __ movq(r, from); + } else { + // sign extend and use a full word? + __ movl(r, from); + } + } else { + if (!r_2->is_valid()) { + __ movflt(r_1->as_XMMRegister(), from); + } else { + __ movdbl(r_1->as_XMMRegister(), from); + } + } +} + void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray& sig, const VMRegPair *regs) { // Note: r13 contains the senderSP on entry. We must preserve it since @@ -803,84 +1015,59 @@ } #endif // INCLUDE_JVMCI + int total_args_passed = compute_total_args_passed(sig); // Now generate the shuffle code. Pick up all register args and move the // rest through the floating point stack top. - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - // Longs and doubles are passed in native word order, but misaligned - // in the 32-bit build. - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // Pick up 0, 1 or 2 words from SP+offset. - assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), - "scrambled load targets?"); - // Load in argument order going down. - int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; - // Point to interpreter value (vs. tag) - int next_off = ld_off - Interpreter::stackElementSize; - // - // - // - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - if (r_1->is_stack()) { - // Convert stack slot to an SP offset (+ wordSize to account for return address ) - int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; - - // We can use r13 as a temp here because compiled code doesn't need r13 as an input - // and if we end up going thru a c2i because of a miss a reasonable value of r13 - // will be generated. - if (!r_2->is_valid()) { - // sign extend??? - __ movl(r13, Address(saved_sp, ld_off)); - __ movptr(Address(rsp, st_off), r13); - } else { - // - // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE - // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the interpreter. - // - // Interpreter local[n] == MSW, local[n+1] == LSW however locals - // are accessed as negative so LSW is at LOW address - - // ld_off is MSW so get LSW - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; - __ movq(r13, Address(saved_sp, offset)); - // st_off is LSW (i.e. reg.first()) - __ movq(Address(rsp, st_off), r13); - } - } else if (r_1->is_Register()) { // Register argument - Register r = r_1->as_Register(); - assert(r != rax, "must be different"); - if (r_2->is_valid()) { - // - // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE - // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the interpreter. - - const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? - next_off : ld_off; - - // this can be a misaligned move - __ movq(r, Address(saved_sp, offset)); - } else { - // sign extend and use a full word? - __ movl(r, Address(saved_sp, ld_off)); - } + // i is the next argument from the compiler point of view (value + // type fields are passed in registers/on the stack). In sig, a + // value type argument starts with: T_VALUETYPE, followed by the + // types of the fields of the value type and T_VOID to mark the end + // of the value type. ignored counts the number of + // T_VALUETYPE/T_VOID. k is the next argument from the interpreter + // point of view (value types are passed by reference). + for (int i = 0, ignored = 0, k = 0; i < sig.length(); i++) { + assert((i == 0 && ignored == 0) || ignored < i, ""); + assert(k < total_args_passed, ""); + BasicType bt = sig.at(i)._bt; + int ld_off = (total_args_passed - k)*Interpreter::stackElementSize; + if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) { + // Load in argument order going down. + // Point to interpreter value (vs. tag) + int next_off = ld_off - Interpreter::stackElementSize; + const int offset = (bt==T_LONG||bt==T_DOUBLE) ? next_off : ld_off; + gen_i2c_adapter_helper(masm, bt, i > 0 ? sig.at(i-1)._bt : T_ILLEGAL, regs[i-ignored], Address(saved_sp, offset)); + k++; } else { - if (!r_2->is_valid()) { - __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); - } else { - __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); - } + k++; + ignored++; + // get the buffer for that value type + __ movptr(r10, Address(saved_sp, ld_off)); + int vt = 1; + // load fields to registers/stack slots from the buffer: we know + // we are done with that value type argument when we hit the + // T_VOID that acts as an end of value type delimiter for this + // value type. Value types are flattened so we might encounter a + // embedded value types. Each entry in sig contains a field + // offset in the buffer. + do { + i++; + BasicType bt = sig.at(i)._bt; + BasicType prev_bt = sig.at(i-1)._bt; + if (bt == T_VALUETYPE) { + vt++; + ignored++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + ignored++; + } else { + int off = sig.at(i)._offset; + assert(off > 0, ""); + gen_i2c_adapter_helper(masm, bt, prev_bt, regs[i - ignored], Address(r10, off)); + } + } while (vt != 0); } } @@ -897,7 +1084,7 @@ __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // put Method* where a c2i would expect should we end up there - // only needed becaus eof c2 resolve stubs return Method* as a result in + // only needed because of c2 resolve stubs return Method* as a result in // rax __ mov(rax, rbx); __ jmp(r11); @@ -905,14 +1092,14 @@ // --------------------------------------------------------------- AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, - const BasicType *sig_bt, + const GrowableArray& sig, const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { + AdapterFingerPrint* fingerprint, + AdapterBlob*& new_adapter) { address i2c_entry = __ pc(); - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); + gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); // ------------------------------------------------------------------------- // Generate a C2I adapter. On entry we know rbx holds the Method* during calls @@ -949,9 +1136,13 @@ address c2i_entry = __ pc(); - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); + OopMapSet* oop_maps = NULL; + int frame_complete = CodeOffsets::frame_never_safe; + int frame_size_in_words = 0; + gen_c2i_adapter(masm, sig, regs, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words); __ flush(); + new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps); return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); }