< prev index next >

src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp

Print this page

        

*** 26,35 **** --- 26,36 ---- #ifndef _WINDOWS #include "alloca.h" #endif #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" + #include "classfile/symbolTable.hpp" #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/nativeInst.hpp" #include "code/vtableStubs.hpp" #include "gc/shared/collectedHeap.hpp"
*** 489,498 **** --- 490,500 ---- assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); // fall through case T_OBJECT: case T_ARRAY: case T_ADDRESS: + case T_VALUETYPE: if (int_args < Argument::n_int_register_parameters_j) { regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); } else { regs[i].set2(VMRegImpl::stack2reg(stk_args)); stk_args += 2;
*** 522,531 **** --- 524,615 ---- } return align_up(stk_args, 2); } + // Same as java_calling_convention() but for multiple return + // values. There's no way to store them on the stack so if we don't + // have enough registers, multiple values can't be returned. + const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1; + const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j; + int SharedRuntime::java_return_convention(const BasicType *sig_bt, + VMRegPair *regs, + int total_args_passed) { + // Create the mapping between argument positions and + // registers. + static const Register INT_ArgReg[java_return_convention_max_int] = { + rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0 + }; + static const XMMRegister FP_ArgReg[java_return_convention_max_float] = { + j_farg0, j_farg1, j_farg2, j_farg3, + j_farg4, j_farg5, j_farg6, j_farg7 + }; + + + uint int_args = 0; + uint fp_args = 0; + + for (int i = 0; i < total_args_passed; i++) { + switch (sig_bt[i]) { + case T_BOOLEAN: + case T_CHAR: + case T_BYTE: + case T_SHORT: + case T_INT: + if (int_args < Argument::n_int_register_parameters_j+1) { + regs[i].set1(INT_ArgReg[int_args]->as_VMReg()); + int_args++; + } else { + return -1; + } + break; + case T_VOID: + // halves of T_LONG or T_DOUBLE + assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); + regs[i].set_bad(); + break; + case T_LONG: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + // fall through + case T_OBJECT: + case T_VALUETYPE: + case T_ARRAY: + case T_ADDRESS: + case T_METADATA: + if (int_args < Argument::n_int_register_parameters_j+1) { + regs[i].set2(INT_ArgReg[int_args]->as_VMReg()); + int_args++; + } else { + return -1; + } + break; + case T_FLOAT: + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set1(FP_ArgReg[fp_args]->as_VMReg()); + fp_args++; + } else { + return -1; + } + break; + case T_DOUBLE: + assert(sig_bt[i + 1] == T_VOID, "expecting half"); + if (fp_args < Argument::n_float_register_parameters_j) { + regs[i].set2(FP_ArgReg[fp_args]->as_VMReg()); + fp_args++; + } else { + return -1; + } + break; + default: + ShouldNotReachHere(); + break; + } + } + + return int_args + fp_args; + } + // Patch the callers callsite with entry to compiled code if it exists. static void patch_callers_callsite(MacroAssembler *masm) { Label L; __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); __ jcc(Assembler::equal, L);
*** 564,594 **** // restore sp __ mov(rsp, r13); __ bind(L); } static void gen_c2i_adapter(MacroAssembler *masm, ! int total_args_passed, ! int comp_args_on_stack, ! const BasicType *sig_bt, const VMRegPair *regs, ! Label& skip_fixup) { // Before we get into the guts of the C2I adapter, see if we should be here // at all. We've come from compiled code and are attempting to jump to the // interpreter, which means the caller made a static call to get here // (vcalls always get a compiled target if there is one). Check for a // compiled target. If there is one, we need to patch the caller's call. patch_callers_callsite(masm); __ bind(skip_fixup); // Since all args are passed on the stack, total_args_passed * // Interpreter::stackElementSize is the space we need. Plus 1 because // we also account for the return address location since // we store it first rather than hold it in rax across all the shuffling ! int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; // stack is aligned, keep it that way extraspace = align_up(extraspace, 2*wordSize); --- 648,835 ---- // restore sp __ mov(rsp, r13); __ bind(L); } + // For each value type argument, sig includes the list of fields of + // the value type. This utility function computes the number of + // arguments for the call if value types are passed by reference (the + // calling convention the interpreter expects). + static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) { + int total_args_passed = 0; + if (ValueTypePassFieldsAsArgs) { + for (int i = 0; i < sig_extended->length(); i++) { + BasicType bt = sig_extended->at(i)._bt; + if (SigEntry::is_reserved_entry(sig_extended, i)) { + // Ignore reserved entry + } else if (bt == T_VALUETYPE) { + // In sig_extended, a value type argument starts with: + // T_VALUETYPE, followed by the types of the fields of the + // value type and T_VOID to mark the end of the value + // type. Value types are flattened so, for instance, in the + // case of a value type with an int field and a value type + // field that itself has 2 fields, an int and a long: + // T_VALUETYPE T_INT T_VALUETYPE T_INT T_LONG T_VOID (second + // slot for the T_LONG) T_VOID (inner T_VALUETYPE) T_VOID + // (outer T_VALUETYPE) + total_args_passed++; + int vt = 1; + do { + i++; + BasicType bt = sig_extended->at(i)._bt; + BasicType prev_bt = sig_extended->at(i-1)._bt; + if (bt == T_VALUETYPE) { + vt++; + } else if (bt == T_VOID && + prev_bt != T_LONG && + prev_bt != T_DOUBLE) { + vt--; + } + } while (vt != 0); + } else { + total_args_passed++; + } + } + } else { + total_args_passed = sig_extended->length(); + } + return total_args_passed; + } + + + static void gen_c2i_adapter_helper(MacroAssembler* masm, + BasicType bt, + BasicType prev_bt, + size_t size_in_bytes, + const VMRegPair& reg_pair, + const Address& to, + int extraspace, + bool is_oop) { + assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here"); + if (bt == T_VOID) { + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + + // Say 4 args: + // i st_off + // 0 32 T_LONG + // 1 24 T_VOID + // 2 16 T_OBJECT + // 3 8 T_BOOL + // - 0 return address + // + // However to make thing extra confusing. Because we can fit a long/double in + // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter + // leaves one slot empty and only stores to a single slot. In this case the + // slot that is occupied is the T_VOID slot. See I said it was confusing. + + bool wide = (size_in_bytes == wordSize); + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + assert(r_2->is_valid() == wide, "invalid size"); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), "must be invalid"); + return; + } + + if (!r_1->is_XMMRegister()) { + Register val = rax; + assert_different_registers(to.base(), val); + if(r_1->is_stack()) { + int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; + __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false); + } else { + val = r_1->as_Register(); + } + if (is_oop) { + // We don't need barriers because the destination is a newly allocated object. + // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp. + if (UseCompressedOops) { + __ encode_heap_oop(val); + __ movl(to, val); + } else { + __ movptr(to, val); + } + } else { + __ store_sized_value(to, val, size_in_bytes); + } + } else { + if (wide) { + __ movdbl(to, r_1->as_XMMRegister()); + } else { + __ movflt(to, r_1->as_XMMRegister()); + } + } + } static void gen_c2i_adapter(MacroAssembler *masm, ! const GrowableArray<SigEntry>* sig_extended, const VMRegPair *regs, ! Label& skip_fixup, ! address start, ! OopMapSet* oop_maps, ! int& frame_complete, ! int& frame_size_in_words, ! bool alloc_value_receiver) { // Before we get into the guts of the C2I adapter, see if we should be here // at all. We've come from compiled code and are attempting to jump to the // interpreter, which means the caller made a static call to get here // (vcalls always get a compiled target if there is one). Check for a // compiled target. If there is one, we need to patch the caller's call. patch_callers_callsite(masm); __ bind(skip_fixup); + bool has_value_argument = false; + if (ValueTypePassFieldsAsArgs) { + // Is there a value type argument? + for (int i = 0; i < sig_extended->length() && !has_value_argument; i++) { + has_value_argument = (sig_extended->at(i)._bt == T_VALUETYPE); + } + if (has_value_argument) { + // There is at least a value type argument: we're coming from + // compiled code so we have no buffers to back the value + // types. Allocate the buffers here with a runtime call. + OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words); + + frame_complete = __ offset(); + + __ set_last_Java_frame(noreg, noreg, NULL); + + __ mov(c_rarg0, r15_thread); + __ mov(c_rarg1, rbx); + __ mov64(c_rarg2, (int64_t)alloc_value_receiver); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_value_types))); + + oop_maps->add_gc_map((int)(__ pc() - start), map); + __ reset_last_Java_frame(false); + + RegisterSaver::restore_live_registers(masm); + + Label no_exception; + __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); + __ jcc(Assembler::equal, no_exception); + + __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD); + __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset())); + __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + + __ bind(no_exception); + + // We get an array of objects from the runtime call + __ get_vm_result(r13, r15_thread); // Use r13 as temporary because r10 is trashed by movptr() + __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live? + __ mov(r10, r13); + } + } + // Since all args are passed on the stack, total_args_passed * // Interpreter::stackElementSize is the space we need. Plus 1 because // we also account for the return address location since // we store it first rather than hold it in rax across all the shuffling ! int total_args_passed = compute_total_args_passed_int(sig_extended); int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize; // stack is aligned, keep it that way extraspace = align_up(extraspace, 2*wordSize);
*** 602,702 **** // Store the return address in the expected location __ movptr(Address(rsp, 0), rax); // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // offset to start parameters - int st_off = (total_args_passed - i) * Interpreter::stackElementSize; - int next_off = st_off - Interpreter::stackElementSize; - - // Say 4 args: - // i st_off - // 0 32 T_LONG - // 1 24 T_VOID - // 2 16 T_OBJECT - // 3 8 T_BOOL - // - 0 return address - // - // However to make thing extra confusing. Because we can fit a long/double in - // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter - // leaves one slot empty and only stores to a single slot. In this case the - // slot that is occupied is the T_VOID slot. See I said it was confusing. ! VMReg r_1 = regs[i].first(); ! VMReg r_2 = regs[i].second(); ! if (!r_1->is_valid()) { ! assert(!r_2->is_valid(), ""); ! continue; } ! if (r_1->is_stack()) { ! // memory to memory use rax ! int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ! if (!r_2->is_valid()) { ! // sign extend?? ! __ movl(rax, Address(rsp, ld_off)); ! __ movptr(Address(rsp, st_off), rax); ! ! } else { ! ! __ movq(rax, Address(rsp, ld_off)); ! ! // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG ! // T_DOUBLE and T_LONG use two slots in the interpreter ! if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { ! // ld_off == LSW, ld_off+wordSize == MSW ! // st_off == MSW, next_off == LSW ! __ movq(Address(rsp, next_off), rax); #ifdef ASSERT // Overwrite the unused slot with known junk __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); __ movptr(Address(rsp, st_off), rax); - #endif /* ASSERT */ - } else { - __ movq(Address(rsp, st_off), rax); - } } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { - // must be only an int (or less ) so move only 32bits to slot - // why not sign extend?? - __ movl(Address(rsp, st_off), r); - } else { - // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG - // T_DOUBLE and T_LONG use two slots in the interpreter - if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { - // long/double in gpr - #ifdef ASSERT - // Overwrite the unused slot with known junk - __ mov64(rax, CONST64(0xdeadffffdeadaaab)); - __ movptr(Address(rsp, st_off), rax); #endif /* ASSERT */ - __ movq(Address(rsp, next_off), r); } else { ! __ movptr(Address(rsp, st_off), r); } } ! } else { ! assert(r_1->is_XMMRegister(), ""); ! if (!r_2->is_valid()) { ! // only a float use just part of the slot ! __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); ! } else { ! #ifdef ASSERT ! // Overwrite the unused slot with known junk ! __ mov64(rax, CONST64(0xdeadffffdeadaaac)); ! __ movptr(Address(rsp, st_off), rax); ! #endif /* ASSERT */ ! __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister()); } } } // Schedule the branch target address early. __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); __ jmp(rcx); --- 843,945 ---- // Store the return address in the expected location __ movptr(Address(rsp, 0), rax); // Now write the args into the outgoing interpreter space ! // next_arg_comp is the next argument from the compiler point of ! // view (value type fields are passed in registers/on the stack). In ! // sig_extended, a value type argument starts with: T_VALUETYPE, ! // followed by the types of the fields of the value type and T_VOID ! // to mark the end of the value type. ignored counts the number of ! // T_VALUETYPE/T_VOID. next_vt_arg is the next value type argument: ! // used to get the buffer for that argument from the pool of buffers ! // we allocated above and want to pass to the ! // interpreter. next_arg_int is the next argument from the ! // interpreter point of view (value types are passed by reference). ! bool has_oop_field = false; ! for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0; ! next_arg_comp < sig_extended->length(); next_arg_comp++) { ! assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); ! assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?"); ! BasicType bt = sig_extended->at(next_arg_comp)._bt; ! int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize; ! if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) { ! if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) { ! continue; // Ignore reserved entry } ! int next_off = st_off - Interpreter::stackElementSize; ! const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off; ! const VMRegPair reg_pair = regs[next_arg_comp-ignored]; ! size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; ! gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, ! size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false); ! next_arg_int++; #ifdef ASSERT + if (bt == T_LONG || bt == T_DOUBLE) { // Overwrite the unused slot with known junk __ mov64(rax, CONST64(0xdeadffffdeadaaaa)); __ movptr(Address(rsp, st_off), rax); } #endif /* ASSERT */ } else { ! ignored++; ! // get the buffer from the just allocated pool of buffers ! int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_VALUETYPE); ! __ load_heap_oop(r11, Address(r10, index)); ! next_vt_arg++; next_arg_int++; ! int vt = 1; ! // write fields we get from compiled code in registers/stack ! // slots to the buffer: we know we are done with that value type ! // argument when we hit the T_VOID that acts as an end of value ! // type delimiter for this value type. Value types are flattened ! // so we might encounter embedded value types. Each entry in ! // sig_extended contains a field offset in the buffer. ! do { ! next_arg_comp++; ! BasicType bt = sig_extended->at(next_arg_comp)._bt; ! BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt; ! if (bt == T_VALUETYPE) { ! vt++; ! ignored++; ! } else if (bt == T_VOID && ! prev_bt != T_LONG && ! prev_bt != T_DOUBLE) { ! vt--; ! ignored++; ! } else if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) { ! // Ignore reserved entry ! } else { ! int off = sig_extended->at(next_arg_comp)._offset; ! assert(off > 0, "offset in object should be positive"); ! size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; ! bool is_oop = (bt == T_OBJECT || bt == T_ARRAY); ! has_oop_field = has_oop_field || is_oop; ! gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL, ! size_in_bytes, regs[next_arg_comp-ignored], Address(r11, off), extraspace, is_oop); ! } ! } while (vt != 0); ! // pass the buffer to the interpreter ! __ movptr(Address(rsp, st_off), r11); } } ! ! // If a value type was allocated and initialized, apply post barrier to all oop fields ! if (has_value_argument && has_oop_field) { ! __ push(r13); // save senderSP ! __ push(rbx); // save callee ! // Allocate argument register save area ! if (frame::arg_reg_save_area_bytes != 0) { ! __ subptr(rsp, frame::arg_reg_save_area_bytes); } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::apply_post_barriers), r15_thread, r10); + // De-allocate argument register save area + if (frame::arg_reg_save_area_bytes != 0) { + __ addptr(rsp, frame::arg_reg_save_area_bytes); } + __ pop(rbx); // restore callee + __ pop(r13); // restore sender SP } // Schedule the branch target address early. __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); __ jmp(rcx);
*** 713,726 **** __ cmpptr(pc_reg, temp_reg); __ jcc(Assembler::below, L_ok); __ bind(L_fail); } void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, ! const BasicType *sig_bt, const VMRegPair *regs) { // Note: r13 contains the senderSP on entry. We must preserve it since // we may do a i2c -> c2i transition if we lose a race where compiled // code goes non-entrant while we get args ready. --- 956,1019 ---- __ cmpptr(pc_reg, temp_reg); __ jcc(Assembler::below, L_ok); __ bind(L_fail); } + static void gen_i2c_adapter_helper(MacroAssembler* masm, + BasicType bt, + BasicType prev_bt, + size_t size_in_bytes, + const VMRegPair& reg_pair, + const Address& from, + bool is_oop) { + assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here"); + if (bt == T_VOID) { + // Longs and doubles are passed in native word order, but misaligned + // in the 32-bit build. + assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half"); + return; + } + assert(!reg_pair.second()->is_valid() || reg_pair.first()->next() == reg_pair.second(), + "scrambled load targets?"); + + bool wide = (size_in_bytes == wordSize); + VMReg r_1 = reg_pair.first(); + VMReg r_2 = reg_pair.second(); + assert(r_2->is_valid() == wide, "invalid size"); + if (!r_1->is_valid()) { + assert(!r_2->is_valid(), "must be invalid"); + return; + } + + bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN); + if (!r_1->is_XMMRegister()) { + // We can use r13 as a temp here because compiled code doesn't need r13 as an input + // and if we end up going thru a c2i because of a miss a reasonable value of r13 + // will be generated. + Register dst = r_1->is_stack() ? r13 : r_1->as_Register(); + if (is_oop) { + __ load_heap_oop(dst, from); + } else { + __ load_sized_value(dst, from, size_in_bytes, is_signed); + } + if (r_1->is_stack()) { + // Convert stack slot to an SP offset (+ wordSize to account for return address) + int st_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + __ movq(Address(rsp, st_off), dst); + } + } else { + if (wide) { + __ movdbl(r_1->as_XMMRegister(), from); + } else { + __ movflt(r_1->as_XMMRegister(), from); + } + } + } + void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, int comp_args_on_stack, ! const GrowableArray<SigEntry>* sig, const VMRegPair *regs) { // Note: r13 contains the senderSP on entry. We must preserve it since // we may do a i2c -> c2i transition if we lose a race where compiled // code goes non-entrant while we get args ready.
*** 794,804 **** // Round up to miminum stack alignment, in wordSize comp_words_on_stack = align_up(comp_words_on_stack, 2); __ subptr(rsp, comp_words_on_stack * wordSize); } - // Ensure compiled code always sees stack at proper alignment __ andptr(rsp, -16); // push the return address and misalign the stack that youngest frame always sees // as far as the placement of the call instruction --- 1087,1096 ----
*** 808,818 **** --- 1100,1116 ---- const Register saved_sp = rax; __ movptr(saved_sp, r11); // Will jump to the compiled code just as if compiled code was doing it. // Pre-load the register-jump target early, to schedule it better. + if (StressValueTypePassFieldsAsArgs) { + // For stress testing, don't unpack value types in the i2c adapter but + // call the value type entry point and let it take care of unpacking. + __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_value_offset()))); + } else { __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset()))); + } #if INCLUDE_JVMCI if (EnableJVMCI || UseAOT) { // check if this call should be routed towards a specific entry point __ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
*** 822,909 **** __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); __ bind(no_alternative_target); } #endif // INCLUDE_JVMCI // Now generate the shuffle code. Pick up all register args and move the // rest through the floating point stack top. - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - // Longs and doubles are passed in native word order, but misaligned - // in the 32-bit build. - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // Pick up 0, 1 or 2 words from SP+offset. ! assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), ! "scrambled load targets?"); // Load in argument order going down. - int ld_off = (total_args_passed - i)*Interpreter::stackElementSize; // Point to interpreter value (vs. tag) ! int next_off = ld_off - Interpreter::stackElementSize; ! // ! // ! // ! VMReg r_1 = regs[i].first(); ! VMReg r_2 = regs[i].second(); ! if (!r_1->is_valid()) { ! assert(!r_2->is_valid(), ""); ! continue; ! } ! if (r_1->is_stack()) { ! // Convert stack slot to an SP offset (+ wordSize to account for return address ) ! int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; ! ! // We can use r13 as a temp here because compiled code doesn't need r13 as an input ! // and if we end up going thru a c2i because of a miss a reasonable value of r13 ! // will be generated. ! if (!r_2->is_valid()) { ! // sign extend??? ! __ movl(r13, Address(saved_sp, ld_off)); ! __ movptr(Address(rsp, st_off), r13); ! } else { ! // ! // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE ! // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case ! // So we must adjust where to pick up the data to match the interpreter. ! // ! // Interpreter local[n] == MSW, local[n+1] == LSW however locals ! // are accessed as negative so LSW is at LOW address ! ! // ld_off is MSW so get LSW ! const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? ! next_off : ld_off; ! __ movq(r13, Address(saved_sp, offset)); ! // st_off is LSW (i.e. reg.first()) ! __ movq(Address(rsp, st_off), r13); ! } ! } else if (r_1->is_Register()) { // Register argument ! Register r = r_1->as_Register(); ! assert(r != rax, "must be different"); ! if (r_2->is_valid()) { ! // ! // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE ! // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case ! // So we must adjust where to pick up the data to match the interpreter. ! ! const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)? ! next_off : ld_off; ! ! // this can be a misaligned move ! __ movq(r, Address(saved_sp, offset)); ! } else { ! // sign extend and use a full word? ! __ movl(r, Address(saved_sp, ld_off)); } ! } else { ! if (!r_2->is_valid()) { ! __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); ! } else { ! __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off)); } } } // 6243940 We might end up in handle_wrong_method if // the callee is deoptimized as we race thru here. If that --- 1120,1192 ---- __ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0); __ bind(no_alternative_target); } #endif // INCLUDE_JVMCI + int total_args_passed = compute_total_args_passed_int(sig); // Now generate the shuffle code. Pick up all register args and move the // rest through the floating point stack top. ! // next_arg_comp is the next argument from the compiler point of ! // view (value type fields are passed in registers/on the stack). In ! // sig_extended, a value type argument starts with: T_VALUETYPE, ! // followed by the types of the fields of the value type and T_VOID ! // to mark the end of the value type. ignored counts the number of ! // T_VALUETYPE/T_VOID. next_arg_int is the next argument from the ! // interpreter point of view (value types are passed by reference). ! for (int next_arg_comp = 0, ignored = 0, next_arg_int = 0; next_arg_comp < sig->length(); next_arg_comp++) { ! assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments"); ! assert(next_arg_int <= total_args_passed, "more arguments from the interpreter than expected?"); ! BasicType bt = sig->at(next_arg_comp)._bt; ! int ld_off = (total_args_passed - next_arg_int)*Interpreter::stackElementSize; ! if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) { // Load in argument order going down. // Point to interpreter value (vs. tag) ! if (SigEntry::is_reserved_entry(sig, next_arg_comp)) { ! continue; // Ignore reserved entry } ! int next_off = ld_off - Interpreter::stackElementSize; ! int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off; ! const VMRegPair reg_pair = regs[next_arg_comp-ignored]; ! size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4; ! gen_i2c_adapter_helper(masm, bt, next_arg_comp > 0 ? sig->at(next_arg_comp-1)._bt : T_ILLEGAL, ! size_in_bytes, reg_pair, Address(saved_sp, offset), false); ! next_arg_int++; ! } else { ! next_arg_int++; ! ignored++; ! // get the buffer for that value type ! __ movptr(r10, Address(saved_sp, ld_off)); ! int vt = 1; ! // load fields to registers/stack slots from the buffer: we know ! // we are done with that value type argument when we hit the ! // T_VOID that acts as an end of value type delimiter for this ! // value type. Value types are flattened so we might encounter ! // embedded value types. Each entry in sig_extended contains a ! // field offset in the buffer. ! do { ! next_arg_comp++; ! BasicType bt = sig->at(next_arg_comp)._bt; ! BasicType prev_bt = sig->at(next_arg_comp-1)._bt; ! if (bt == T_VALUETYPE) { ! vt++; ! ignored++; ! } else if (bt == T_VOID && ! prev_bt != T_LONG && ! prev_bt != T_DOUBLE) { ! vt--; ! ignored++; ! } else if (SigEntry::is_reserved_entry(sig, next_arg_comp)) { ! // Ignore reserved entry ! } else { ! int off = sig->at(next_arg_comp)._offset; ! assert(off > 0, "offset in object should be positive"); ! size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize; ! bool is_oop = (bt == T_OBJECT || bt == T_ARRAY); ! gen_i2c_adapter_helper(masm, bt, prev_bt, size_in_bytes, regs[next_arg_comp - ignored], Address(r10, off), is_oop); } + } while (vt != 0); } } // 6243940 We might end up in handle_wrong_method if // the callee is deoptimized as we race thru here. If that
*** 916,941 **** // and the vm will find there should this case occur. __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // put Method* where a c2i would expect should we end up there ! // only needed becaus eof c2 resolve stubs return Method* as a result in // rax __ mov(rax, rbx); __ jmp(r11); } // --------------------------------------------------------------- AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, int comp_args_on_stack, ! const BasicType *sig_bt, ! const VMRegPair *regs, ! AdapterFingerPrint* fingerprint) { address i2c_entry = __ pc(); ! ! gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); // ------------------------------------------------------------------------- // Generate a C2I adapter. On entry we know rbx holds the Method* during calls // to the interpreter. The args start out packed in the compiled layout. They // need to be unpacked into the interpreter layout. This will almost always --- 1199,1234 ---- // and the vm will find there should this case occur. __ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx); // put Method* where a c2i would expect should we end up there ! // only needed because of c2 resolve stubs return Method* as a result in // rax __ mov(rax, rbx); __ jmp(r11); } // --------------------------------------------------------------- AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, int comp_args_on_stack, ! int comp_args_on_stack_cc, ! const GrowableArray<SigEntry>* sig, ! const VMRegPair* regs, ! const GrowableArray<SigEntry>* sig_cc, ! const VMRegPair* regs_cc, ! const GrowableArray<SigEntry>* sig_cc_ro, ! const VMRegPair* regs_cc_ro, ! AdapterFingerPrint* fingerprint, ! AdapterBlob*& new_adapter) { address i2c_entry = __ pc(); ! if (StressValueTypePassFieldsAsArgs) { ! // For stress testing, don't unpack value types in the i2c adapter but ! // call the value type entry point and let it take care of unpacking. ! gen_i2c_adapter(masm, comp_args_on_stack, sig, regs); ! } else { ! gen_i2c_adapter(masm, comp_args_on_stack_cc, sig_cc, regs_cc); ! } // ------------------------------------------------------------------------- // Generate a C2I adapter. On entry we know rbx holds the Method* during calls // to the interpreter. The args start out packed in the compiled layout. They // need to be unpacked into the interpreter layout. This will almost always
*** 966,981 **** __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); __ jcc(Assembler::equal, skip_fixup); __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); } address c2i_entry = __ pc(); ! gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); __ flush(); ! return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); } int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, VMRegPair *regs2, --- 1259,1300 ---- __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD); __ jcc(Assembler::equal, skip_fixup); __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); } + OopMapSet* oop_maps = new OopMapSet(); + int frame_complete = CodeOffsets::frame_never_safe; + int frame_size_in_words = 0; + + // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver) + address c2i_value_ro_entry = __ pc(); + if (regs_cc != regs_cc_ro) { + Label unused; + gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); + skip_fixup = unused; + } + + // Scalarized c2i adapter address c2i_entry = __ pc(); + gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true); ! // Non-scalarized c2i adapter ! address c2i_value_entry = c2i_entry; ! if (regs != regs_cc) { ! c2i_value_entry = __ pc(); ! Label unused; ! gen_c2i_adapter(masm, sig, regs, unused, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false); ! } __ flush(); ! ! // The c2i adapters might safepoint and trigger a GC. The caller must make sure that ! // the GC knows about the location of oop argument locations passed to the c2i adapter. ! bool caller_must_gc_arguments = (regs != regs_cc); ! new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments); ! ! return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_value_entry, c2i_value_ro_entry, c2i_unverified_entry); } int SharedRuntime::c_calling_convention(const BasicType *sig_bt, VMRegPair *regs, VMRegPair *regs2,
*** 1029,1038 **** --- 1348,1358 ---- case T_LONG: assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half"); // fall through case T_OBJECT: case T_ARRAY: + case T_VALUETYPE: case T_ADDRESS: case T_METADATA: if (int_args < Argument::n_int_register_parameters_c) { regs[i].set2(INT_ArgReg[int_args++]->as_VMReg()); #ifdef _WIN64
*** 1379,1389 **** (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) { int offset = slot * VMRegImpl::stack_slot_size; if (map != NULL) { __ movq(Address(rsp, offset), in_regs[i].first()->as_Register()); if (in_sig_bt[i] == T_ARRAY) { ! map->set_oop(VMRegImpl::stack2reg(slot));; } } else { __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset)); } slot += VMRegImpl::slots_per_word; --- 1699,1709 ---- (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) { int offset = slot * VMRegImpl::stack_slot_size; if (map != NULL) { __ movq(Address(rsp, offset), in_regs[i].first()->as_Register()); if (in_sig_bt[i] == T_ARRAY) { ! map->set_oop(VMRegImpl::stack2reg(slot)); } } else { __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset)); } slot += VMRegImpl::slots_per_word;
*** 1413,1422 **** --- 1733,1743 ---- case T_ARRAY: case T_LONG: // handled above break; case T_OBJECT: + case T_VALUETYPE: default: ShouldNotReachHere(); } } else if (in_regs[i].first()->is_XMMRegister()) { if (in_sig_bt[i] == T_FLOAT) { int offset = slot * VMRegImpl::stack_slot_size;
*** 1788,1798 **** const VMRegPair* regs) { Register temp_reg = rbx; // not part of any compiled calling seq if (VerifyOops) { for (int i = 0; i < method->size_of_parameters(); i++) { if (sig_bt[i] == T_OBJECT || ! sig_bt[i] == T_ARRAY) { VMReg r = regs[i].first(); assert(r->is_valid(), "bad oop arg"); if (r->is_stack()) { __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); __ verify_oop(temp_reg); --- 2109,2120 ---- const VMRegPair* regs) { Register temp_reg = rbx; // not part of any compiled calling seq if (VerifyOops) { for (int i = 0; i < method->size_of_parameters(); i++) { if (sig_bt[i] == T_OBJECT || ! sig_bt[i] == T_ARRAY || ! sig_bt[i] == T_VALUETYPE) { VMReg r = regs[i].first(); assert(r->is_valid(), "bad oop arg"); if (r->is_stack()) { __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); __ verify_oop(temp_reg);
*** 2328,2337 **** --- 2650,2660 ---- freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true; } #endif break; } + case T_VALUETYPE: case T_OBJECT: assert(!is_critical_native, "no oop arguments"); object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ((i == 0) && (!is_static)), &receiver_offset);
*** 2463,2472 **** --- 2786,2799 ---- // Load immediate 1 into swap_reg %rax __ movl(swap_reg, 1); // Load (object->mark() | 1) into swap_reg %rax __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + if (EnableValhalla && !UseBiasedLocking) { + // For slow path is_always_locked, using biased, which is never natural for !UseBiasLocking + __ andptr(swap_reg, ~markOopDesc::biased_lock_bit_in_place); + } // Save (object->mark() | 1) into BasicLock's displaced header __ movptr(Address(lock_reg, mark_word_offset), swap_reg); // src -> dest iff dest == rax else rax <- dest
*** 2524,2533 **** --- 2851,2861 ---- case T_DOUBLE : case T_FLOAT : // Result is in xmm0 we'll save as needed break; case T_ARRAY: // Really a handle + case T_VALUETYPE: // Really a handle case T_OBJECT: // Really a handle break; // can't de-handlize until after safepoint check case T_VOID: break; case T_LONG: break; default : ShouldNotReachHere();
*** 2677,2687 **** } __ reset_last_Java_frame(false); // Unbox oop result, e.g. JNIHandles::resolve value. ! if (ret_type == T_OBJECT || ret_type == T_ARRAY) { __ resolve_jobject(rax /* value */, r15_thread /* thread */, rcx /* tmp */); } --- 3005,3015 ---- } __ reset_last_Java_frame(false); // Unbox oop result, e.g. JNIHandles::resolve value. ! if (ret_type == T_OBJECT || ret_type == T_ARRAY || ret_type == T_VALUETYPE) { __ resolve_jobject(rax /* value */, r15_thread /* thread */, rcx /* tmp */); }
*** 4023,4027 **** --- 4351,4466 ---- // Set exception blob _exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1); } #endif // COMPILER2 + + BufferedValueTypeBlob* SharedRuntime::generate_buffered_value_type_adapter(const ValueKlass* vk) { + BufferBlob* buf = BufferBlob::create("value types pack/unpack", 16 * K); + CodeBuffer buffer(buf); + short buffer_locs[20]; + buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs, + sizeof(buffer_locs)/sizeof(relocInfo)); + + MacroAssembler* masm = new MacroAssembler(&buffer); + + const Array<SigEntry>* sig_vk = vk->extended_sig(); + const Array<VMRegPair>* regs = vk->return_regs(); + + int pack_fields_off = __ offset(); + + int j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_VALUETYPE) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + assert(off > 0, "offset in object should be positive"); + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address to(rax, off); + if (bt == T_FLOAT) { + __ movflt(to, r_1->as_XMMRegister()); + } else if (bt == T_DOUBLE) { + __ movdbl(to, r_1->as_XMMRegister()); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + Register val = r_1->as_Register(); + assert_different_registers(rax, val); + // We don't need barriers because the destination is a newly allocated object. + // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp. + if (UseCompressedOops) { + __ encode_heap_oop(val); + __ movl(to, val); + } else { + __ movptr(to, val); + } + + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + assert_different_registers(rax, r_1->as_Register()); + size_t size_in_bytes = type2aelembytes(bt); + __ store_sized_value(to, r_1->as_Register(), size_in_bytes); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + __ ret(0); + + int unpack_fields_off = __ offset(); + + j = 1; + for (int i = 0; i < sig_vk->length(); i++) { + BasicType bt = sig_vk->at(i)._bt; + if (bt == T_VALUETYPE) { + continue; + } + if (bt == T_VOID) { + if (sig_vk->at(i-1)._bt == T_LONG || + sig_vk->at(i-1)._bt == T_DOUBLE) { + j++; + } + continue; + } + int off = sig_vk->at(i)._offset; + assert(off > 0, "offset in object should be positive"); + VMRegPair pair = regs->at(j); + VMReg r_1 = pair.first(); + VMReg r_2 = pair.second(); + Address from(rax, off); + if (bt == T_FLOAT) { + __ movflt(r_1->as_XMMRegister(), from); + } else if (bt == T_DOUBLE) { + __ movdbl(r_1->as_XMMRegister(), from); + } else if (bt == T_OBJECT || bt == T_ARRAY) { + assert_different_registers(rax, r_1->as_Register()); + __ load_heap_oop(r_1->as_Register(), from); + } else { + assert(is_java_primitive(bt), "unexpected basic type"); + assert_different_registers(rax, r_1->as_Register()); + size_t size_in_bytes = type2aelembytes(bt); + __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN); + } + j++; + } + assert(j == regs->length(), "missed a field?"); + + if (StressValueTypeReturnedAsFields) { + __ load_klass(rax, rax); + __ orptr(rax, 1); + } + + __ ret(0); + + __ flush(); + + return BufferedValueTypeBlob::create(&buffer, pack_fields_off, unpack_fields_off); + }
< prev index next >