< prev index next >
src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
Print this page
@@ -26,10 +26,11 @@
#ifndef _WINDOWS
#include "alloca.h"
#endif
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "classfile/symbolTable.hpp"
#include "code/debugInfoRec.hpp"
#include "code/icBuffer.hpp"
#include "code/nativeInst.hpp"
#include "code/vtableStubs.hpp"
#include "gc/shared/collectedHeap.hpp"
@@ -489,10 +490,11 @@
assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
// fall through
case T_OBJECT:
case T_ARRAY:
case T_ADDRESS:
+ case T_VALUETYPE:
if (int_args < Argument::n_int_register_parameters_j) {
regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
} else {
regs[i].set2(VMRegImpl::stack2reg(stk_args));
stk_args += 2;
@@ -522,10 +524,92 @@
}
return align_up(stk_args, 2);
}
+// Same as java_calling_convention() but for multiple return
+// values. There's no way to store them on the stack so if we don't
+// have enough registers, multiple values can't be returned.
+const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j+1;
+const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
+int SharedRuntime::java_return_convention(const BasicType *sig_bt,
+ VMRegPair *regs,
+ int total_args_passed) {
+ // Create the mapping between argument positions and
+ // registers.
+ static const Register INT_ArgReg[java_return_convention_max_int] = {
+ rax, j_rarg5, j_rarg4, j_rarg3, j_rarg2, j_rarg1, j_rarg0
+ };
+ static const XMMRegister FP_ArgReg[java_return_convention_max_float] = {
+ j_farg0, j_farg1, j_farg2, j_farg3,
+ j_farg4, j_farg5, j_farg6, j_farg7
+ };
+
+
+ uint int_args = 0;
+ uint fp_args = 0;
+
+ for (int i = 0; i < total_args_passed; i++) {
+ switch (sig_bt[i]) {
+ case T_BOOLEAN:
+ case T_CHAR:
+ case T_BYTE:
+ case T_SHORT:
+ case T_INT:
+ if (int_args < Argument::n_int_register_parameters_j+1) {
+ regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
+ int_args++;
+ } else {
+ return -1;
+ }
+ break;
+ case T_VOID:
+ // halves of T_LONG or T_DOUBLE
+ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
+ regs[i].set_bad();
+ break;
+ case T_LONG:
+ assert(sig_bt[i + 1] == T_VOID, "expecting half");
+ // fall through
+ case T_OBJECT:
+ case T_VALUETYPE:
+ case T_ARRAY:
+ case T_ADDRESS:
+ case T_METADATA:
+ if (int_args < Argument::n_int_register_parameters_j+1) {
+ regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
+ int_args++;
+ } else {
+ return -1;
+ }
+ break;
+ case T_FLOAT:
+ if (fp_args < Argument::n_float_register_parameters_j) {
+ regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
+ fp_args++;
+ } else {
+ return -1;
+ }
+ break;
+ case T_DOUBLE:
+ assert(sig_bt[i + 1] == T_VOID, "expecting half");
+ if (fp_args < Argument::n_float_register_parameters_j) {
+ regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
+ fp_args++;
+ } else {
+ return -1;
+ }
+ break;
+ default:
+ ShouldNotReachHere();
+ break;
+ }
+ }
+
+ return int_args + fp_args;
+}
+
// Patch the callers callsite with entry to compiled code if it exists.
static void patch_callers_callsite(MacroAssembler *masm) {
Label L;
__ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, L);
@@ -564,31 +648,188 @@
// restore sp
__ mov(rsp, r13);
__ bind(L);
}
+// For each value type argument, sig includes the list of fields of
+// the value type. This utility function computes the number of
+// arguments for the call if value types are passed by reference (the
+// calling convention the interpreter expects).
+static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
+ int total_args_passed = 0;
+ if (ValueTypePassFieldsAsArgs) {
+ for (int i = 0; i < sig_extended->length(); i++) {
+ BasicType bt = sig_extended->at(i)._bt;
+ if (SigEntry::is_reserved_entry(sig_extended, i)) {
+ // Ignore reserved entry
+ } else if (bt == T_VALUETYPE) {
+ // In sig_extended, a value type argument starts with:
+ // T_VALUETYPE, followed by the types of the fields of the
+ // value type and T_VOID to mark the end of the value
+ // type. Value types are flattened so, for instance, in the
+ // case of a value type with an int field and a value type
+ // field that itself has 2 fields, an int and a long:
+ // T_VALUETYPE T_INT T_VALUETYPE T_INT T_LONG T_VOID (second
+ // slot for the T_LONG) T_VOID (inner T_VALUETYPE) T_VOID
+ // (outer T_VALUETYPE)
+ total_args_passed++;
+ int vt = 1;
+ do {
+ i++;
+ BasicType bt = sig_extended->at(i)._bt;
+ BasicType prev_bt = sig_extended->at(i-1)._bt;
+ if (bt == T_VALUETYPE) {
+ vt++;
+ } else if (bt == T_VOID &&
+ prev_bt != T_LONG &&
+ prev_bt != T_DOUBLE) {
+ vt--;
+ }
+ } while (vt != 0);
+ } else {
+ total_args_passed++;
+ }
+ }
+ } else {
+ total_args_passed = sig_extended->length();
+ }
+ return total_args_passed;
+}
+
+
+static void gen_c2i_adapter_helper(MacroAssembler* masm,
+ BasicType bt,
+ BasicType prev_bt,
+ size_t size_in_bytes,
+ const VMRegPair& reg_pair,
+ const Address& to,
+ int extraspace,
+ bool is_oop) {
+ assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here");
+ if (bt == T_VOID) {
+ assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
+ return;
+ }
+
+ // Say 4 args:
+ // i st_off
+ // 0 32 T_LONG
+ // 1 24 T_VOID
+ // 2 16 T_OBJECT
+ // 3 8 T_BOOL
+ // - 0 return address
+ //
+ // However to make thing extra confusing. Because we can fit a long/double in
+ // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
+ // leaves one slot empty and only stores to a single slot. In this case the
+ // slot that is occupied is the T_VOID slot. See I said it was confusing.
+
+ bool wide = (size_in_bytes == wordSize);
+ VMReg r_1 = reg_pair.first();
+ VMReg r_2 = reg_pair.second();
+ assert(r_2->is_valid() == wide, "invalid size");
+ if (!r_1->is_valid()) {
+ assert(!r_2->is_valid(), "must be invalid");
+ return;
+ }
+
+ if (!r_1->is_XMMRegister()) {
+ Register val = rax;
+ assert_different_registers(to.base(), val);
+ if(r_1->is_stack()) {
+ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
+ __ load_sized_value(val, Address(rsp, ld_off), size_in_bytes, /* is_signed */ false);
+ } else {
+ val = r_1->as_Register();
+ }
+ if (is_oop) {
+ // We don't need barriers because the destination is a newly allocated object.
+ // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp.
+ if (UseCompressedOops) {
+ __ encode_heap_oop(val);
+ __ movl(to, val);
+ } else {
+ __ movptr(to, val);
+ }
+ } else {
+ __ store_sized_value(to, val, size_in_bytes);
+ }
+ } else {
+ if (wide) {
+ __ movdbl(to, r_1->as_XMMRegister());
+ } else {
+ __ movflt(to, r_1->as_XMMRegister());
+ }
+ }
+}
static void gen_c2i_adapter(MacroAssembler *masm,
- int total_args_passed,
- int comp_args_on_stack,
- const BasicType *sig_bt,
+ const GrowableArray<SigEntry>* sig_extended,
const VMRegPair *regs,
- Label& skip_fixup) {
+ Label& skip_fixup,
+ address start,
+ OopMapSet* oop_maps,
+ int& frame_complete,
+ int& frame_size_in_words,
+ bool alloc_value_receiver) {
// Before we get into the guts of the C2I adapter, see if we should be here
// at all. We've come from compiled code and are attempting to jump to the
// interpreter, which means the caller made a static call to get here
// (vcalls always get a compiled target if there is one). Check for a
// compiled target. If there is one, we need to patch the caller's call.
patch_callers_callsite(masm);
__ bind(skip_fixup);
+ bool has_value_argument = false;
+ if (ValueTypePassFieldsAsArgs) {
+ // Is there a value type argument?
+ for (int i = 0; i < sig_extended->length() && !has_value_argument; i++) {
+ has_value_argument = (sig_extended->at(i)._bt == T_VALUETYPE);
+ }
+ if (has_value_argument) {
+ // There is at least a value type argument: we're coming from
+ // compiled code so we have no buffers to back the value
+ // types. Allocate the buffers here with a runtime call.
+ OopMap* map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+
+ frame_complete = __ offset();
+
+ __ set_last_Java_frame(noreg, noreg, NULL);
+
+ __ mov(c_rarg0, r15_thread);
+ __ mov(c_rarg1, rbx);
+ __ mov64(c_rarg2, (int64_t)alloc_value_receiver);
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::allocate_value_types)));
+
+ oop_maps->add_gc_map((int)(__ pc() - start), map);
+ __ reset_last_Java_frame(false);
+
+ RegisterSaver::restore_live_registers(masm);
+
+ Label no_exception;
+ __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
+ __ jcc(Assembler::equal, no_exception);
+
+ __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), (int)NULL_WORD);
+ __ movptr(rax, Address(r15_thread, Thread::pending_exception_offset()));
+ __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+
+ __ bind(no_exception);
+
+ // We get an array of objects from the runtime call
+ __ get_vm_result(r13, r15_thread); // Use r13 as temporary because r10 is trashed by movptr()
+ __ get_vm_result_2(rbx, r15_thread); // TODO: required to keep the callee Method live?
+ __ mov(r10, r13);
+ }
+ }
+
// Since all args are passed on the stack, total_args_passed *
// Interpreter::stackElementSize is the space we need. Plus 1 because
// we also account for the return address location since
// we store it first rather than hold it in rax across all the shuffling
-
+ int total_args_passed = compute_total_args_passed_int(sig_extended);
int extraspace = (total_args_passed * Interpreter::stackElementSize) + wordSize;
// stack is aligned, keep it that way
extraspace = align_up(extraspace, 2*wordSize);
@@ -602,101 +843,103 @@
// Store the return address in the expected location
__ movptr(Address(rsp, 0), rax);
// Now write the args into the outgoing interpreter space
- for (int i = 0; i < total_args_passed; i++) {
- if (sig_bt[i] == T_VOID) {
- assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
- continue;
- }
-
- // offset to start parameters
- int st_off = (total_args_passed - i) * Interpreter::stackElementSize;
- int next_off = st_off - Interpreter::stackElementSize;
-
- // Say 4 args:
- // i st_off
- // 0 32 T_LONG
- // 1 24 T_VOID
- // 2 16 T_OBJECT
- // 3 8 T_BOOL
- // - 0 return address
- //
- // However to make thing extra confusing. Because we can fit a long/double in
- // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
- // leaves one slot empty and only stores to a single slot. In this case the
- // slot that is occupied is the T_VOID slot. See I said it was confusing.
- VMReg r_1 = regs[i].first();
- VMReg r_2 = regs[i].second();
- if (!r_1->is_valid()) {
- assert(!r_2->is_valid(), "");
- continue;
+ // next_arg_comp is the next argument from the compiler point of
+ // view (value type fields are passed in registers/on the stack). In
+ // sig_extended, a value type argument starts with: T_VALUETYPE,
+ // followed by the types of the fields of the value type and T_VOID
+ // to mark the end of the value type. ignored counts the number of
+ // T_VALUETYPE/T_VOID. next_vt_arg is the next value type argument:
+ // used to get the buffer for that argument from the pool of buffers
+ // we allocated above and want to pass to the
+ // interpreter. next_arg_int is the next argument from the
+ // interpreter point of view (value types are passed by reference).
+ bool has_oop_field = false;
+ for (int next_arg_comp = 0, ignored = 0, next_vt_arg = 0, next_arg_int = 0;
+ next_arg_comp < sig_extended->length(); next_arg_comp++) {
+ assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments");
+ assert(next_arg_int <= total_args_passed, "more arguments for the interpreter than expected?");
+ BasicType bt = sig_extended->at(next_arg_comp)._bt;
+ int st_off = (total_args_passed - next_arg_int) * Interpreter::stackElementSize;
+ if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) {
+ if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) {
+ continue; // Ignore reserved entry
}
- if (r_1->is_stack()) {
- // memory to memory use rax
- int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
- if (!r_2->is_valid()) {
- // sign extend??
- __ movl(rax, Address(rsp, ld_off));
- __ movptr(Address(rsp, st_off), rax);
-
- } else {
-
- __ movq(rax, Address(rsp, ld_off));
-
- // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
- // T_DOUBLE and T_LONG use two slots in the interpreter
- if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
- // ld_off == LSW, ld_off+wordSize == MSW
- // st_off == MSW, next_off == LSW
- __ movq(Address(rsp, next_off), rax);
+ int next_off = st_off - Interpreter::stackElementSize;
+ const int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : st_off;
+ const VMRegPair reg_pair = regs[next_arg_comp-ignored];
+ size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4;
+ gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
+ size_in_bytes, reg_pair, Address(rsp, offset), extraspace, false);
+ next_arg_int++;
#ifdef ASSERT
+ if (bt == T_LONG || bt == T_DOUBLE) {
// Overwrite the unused slot with known junk
__ mov64(rax, CONST64(0xdeadffffdeadaaaa));
__ movptr(Address(rsp, st_off), rax);
-#endif /* ASSERT */
- } else {
- __ movq(Address(rsp, st_off), rax);
- }
}
- } else if (r_1->is_Register()) {
- Register r = r_1->as_Register();
- if (!r_2->is_valid()) {
- // must be only an int (or less ) so move only 32bits to slot
- // why not sign extend??
- __ movl(Address(rsp, st_off), r);
- } else {
- // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
- // T_DOUBLE and T_LONG use two slots in the interpreter
- if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
- // long/double in gpr
-#ifdef ASSERT
- // Overwrite the unused slot with known junk
- __ mov64(rax, CONST64(0xdeadffffdeadaaab));
- __ movptr(Address(rsp, st_off), rax);
#endif /* ASSERT */
- __ movq(Address(rsp, next_off), r);
} else {
- __ movptr(Address(rsp, st_off), r);
+ ignored++;
+ // get the buffer from the just allocated pool of buffers
+ int index = arrayOopDesc::base_offset_in_bytes(T_OBJECT) + next_vt_arg * type2aelembytes(T_VALUETYPE);
+ __ load_heap_oop(r11, Address(r10, index));
+ next_vt_arg++; next_arg_int++;
+ int vt = 1;
+ // write fields we get from compiled code in registers/stack
+ // slots to the buffer: we know we are done with that value type
+ // argument when we hit the T_VOID that acts as an end of value
+ // type delimiter for this value type. Value types are flattened
+ // so we might encounter embedded value types. Each entry in
+ // sig_extended contains a field offset in the buffer.
+ do {
+ next_arg_comp++;
+ BasicType bt = sig_extended->at(next_arg_comp)._bt;
+ BasicType prev_bt = sig_extended->at(next_arg_comp-1)._bt;
+ if (bt == T_VALUETYPE) {
+ vt++;
+ ignored++;
+ } else if (bt == T_VOID &&
+ prev_bt != T_LONG &&
+ prev_bt != T_DOUBLE) {
+ vt--;
+ ignored++;
+ } else if (SigEntry::is_reserved_entry(sig_extended, next_arg_comp)) {
+ // Ignore reserved entry
+ } else {
+ int off = sig_extended->at(next_arg_comp)._offset;
+ assert(off > 0, "offset in object should be positive");
+ size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
+ bool is_oop = (bt == T_OBJECT || bt == T_ARRAY);
+ has_oop_field = has_oop_field || is_oop;
+ gen_c2i_adapter_helper(masm, bt, next_arg_comp > 0 ? sig_extended->at(next_arg_comp-1)._bt : T_ILLEGAL,
+ size_in_bytes, regs[next_arg_comp-ignored], Address(r11, off), extraspace, is_oop);
+ }
+ } while (vt != 0);
+ // pass the buffer to the interpreter
+ __ movptr(Address(rsp, st_off), r11);
}
}
- } else {
- assert(r_1->is_XMMRegister(), "");
- if (!r_2->is_valid()) {
- // only a float use just part of the slot
- __ movflt(Address(rsp, st_off), r_1->as_XMMRegister());
- } else {
-#ifdef ASSERT
- // Overwrite the unused slot with known junk
- __ mov64(rax, CONST64(0xdeadffffdeadaaac));
- __ movptr(Address(rsp, st_off), rax);
-#endif /* ASSERT */
- __ movdbl(Address(rsp, next_off), r_1->as_XMMRegister());
+
+ // If a value type was allocated and initialized, apply post barrier to all oop fields
+ if (has_value_argument && has_oop_field) {
+ __ push(r13); // save senderSP
+ __ push(rbx); // save callee
+ // Allocate argument register save area
+ if (frame::arg_reg_save_area_bytes != 0) {
+ __ subptr(rsp, frame::arg_reg_save_area_bytes);
}
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::apply_post_barriers), r15_thread, r10);
+ // De-allocate argument register save area
+ if (frame::arg_reg_save_area_bytes != 0) {
+ __ addptr(rsp, frame::arg_reg_save_area_bytes);
}
+ __ pop(rbx); // restore callee
+ __ pop(r13); // restore sender SP
}
// Schedule the branch target address early.
__ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
__ jmp(rcx);
@@ -713,14 +956,64 @@
__ cmpptr(pc_reg, temp_reg);
__ jcc(Assembler::below, L_ok);
__ bind(L_fail);
}
+static void gen_i2c_adapter_helper(MacroAssembler* masm,
+ BasicType bt,
+ BasicType prev_bt,
+ size_t size_in_bytes,
+ const VMRegPair& reg_pair,
+ const Address& from,
+ bool is_oop) {
+ assert(bt != T_VALUETYPE || !ValueTypePassFieldsAsArgs, "no value type here");
+ if (bt == T_VOID) {
+ // Longs and doubles are passed in native word order, but misaligned
+ // in the 32-bit build.
+ assert(prev_bt == T_LONG || prev_bt == T_DOUBLE, "missing half");
+ return;
+ }
+ assert(!reg_pair.second()->is_valid() || reg_pair.first()->next() == reg_pair.second(),
+ "scrambled load targets?");
+
+ bool wide = (size_in_bytes == wordSize);
+ VMReg r_1 = reg_pair.first();
+ VMReg r_2 = reg_pair.second();
+ assert(r_2->is_valid() == wide, "invalid size");
+ if (!r_1->is_valid()) {
+ assert(!r_2->is_valid(), "must be invalid");
+ return;
+ }
+
+ bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
+ if (!r_1->is_XMMRegister()) {
+ // We can use r13 as a temp here because compiled code doesn't need r13 as an input
+ // and if we end up going thru a c2i because of a miss a reasonable value of r13
+ // will be generated.
+ Register dst = r_1->is_stack() ? r13 : r_1->as_Register();
+ if (is_oop) {
+ __ load_heap_oop(dst, from);
+ } else {
+ __ load_sized_value(dst, from, size_in_bytes, is_signed);
+ }
+ if (r_1->is_stack()) {
+ // Convert stack slot to an SP offset (+ wordSize to account for return address)
+ int st_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
+ __ movq(Address(rsp, st_off), dst);
+ }
+ } else {
+ if (wide) {
+ __ movdbl(r_1->as_XMMRegister(), from);
+ } else {
+ __ movflt(r_1->as_XMMRegister(), from);
+ }
+ }
+}
+
void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
- int total_args_passed,
int comp_args_on_stack,
- const BasicType *sig_bt,
+ const GrowableArray<SigEntry>* sig,
const VMRegPair *regs) {
// Note: r13 contains the senderSP on entry. We must preserve it since
// we may do a i2c -> c2i transition if we lose a race where compiled
// code goes non-entrant while we get args ready.
@@ -794,11 +1087,10 @@
// Round up to miminum stack alignment, in wordSize
comp_words_on_stack = align_up(comp_words_on_stack, 2);
__ subptr(rsp, comp_words_on_stack * wordSize);
}
-
// Ensure compiled code always sees stack at proper alignment
__ andptr(rsp, -16);
// push the return address and misalign the stack that youngest frame always sees
// as far as the placement of the call instruction
@@ -808,11 +1100,17 @@
const Register saved_sp = rax;
__ movptr(saved_sp, r11);
// Will jump to the compiled code just as if compiled code was doing it.
// Pre-load the register-jump target early, to schedule it better.
+ if (StressValueTypePassFieldsAsArgs) {
+ // For stress testing, don't unpack value types in the i2c adapter but
+ // call the value type entry point and let it take care of unpacking.
+ __ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_value_offset())));
+ } else {
__ movptr(r11, Address(rbx, in_bytes(Method::from_compiled_offset())));
+ }
#if INCLUDE_JVMCI
if (EnableJVMCI || UseAOT) {
// check if this call should be routed towards a specific entry point
__ cmpptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
@@ -822,88 +1120,73 @@
__ movptr(Address(r15_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), 0);
__ bind(no_alternative_target);
}
#endif // INCLUDE_JVMCI
+ int total_args_passed = compute_total_args_passed_int(sig);
// Now generate the shuffle code. Pick up all register args and move the
// rest through the floating point stack top.
- for (int i = 0; i < total_args_passed; i++) {
- if (sig_bt[i] == T_VOID) {
- // Longs and doubles are passed in native word order, but misaligned
- // in the 32-bit build.
- assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
- continue;
- }
-
- // Pick up 0, 1 or 2 words from SP+offset.
- assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
- "scrambled load targets?");
+ // next_arg_comp is the next argument from the compiler point of
+ // view (value type fields are passed in registers/on the stack). In
+ // sig_extended, a value type argument starts with: T_VALUETYPE,
+ // followed by the types of the fields of the value type and T_VOID
+ // to mark the end of the value type. ignored counts the number of
+ // T_VALUETYPE/T_VOID. next_arg_int is the next argument from the
+ // interpreter point of view (value types are passed by reference).
+ for (int next_arg_comp = 0, ignored = 0, next_arg_int = 0; next_arg_comp < sig->length(); next_arg_comp++) {
+ assert(ignored <= next_arg_comp, "shouldn't skip over more slots than there are arguments");
+ assert(next_arg_int <= total_args_passed, "more arguments from the interpreter than expected?");
+ BasicType bt = sig->at(next_arg_comp)._bt;
+ int ld_off = (total_args_passed - next_arg_int)*Interpreter::stackElementSize;
+ if (!ValueTypePassFieldsAsArgs || bt != T_VALUETYPE) {
// Load in argument order going down.
- int ld_off = (total_args_passed - i)*Interpreter::stackElementSize;
// Point to interpreter value (vs. tag)
- int next_off = ld_off - Interpreter::stackElementSize;
- //
- //
- //
- VMReg r_1 = regs[i].first();
- VMReg r_2 = regs[i].second();
- if (!r_1->is_valid()) {
- assert(!r_2->is_valid(), "");
- continue;
- }
- if (r_1->is_stack()) {
- // Convert stack slot to an SP offset (+ wordSize to account for return address )
- int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
-
- // We can use r13 as a temp here because compiled code doesn't need r13 as an input
- // and if we end up going thru a c2i because of a miss a reasonable value of r13
- // will be generated.
- if (!r_2->is_valid()) {
- // sign extend???
- __ movl(r13, Address(saved_sp, ld_off));
- __ movptr(Address(rsp, st_off), r13);
- } else {
- //
- // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
- // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
- // So we must adjust where to pick up the data to match the interpreter.
- //
- // Interpreter local[n] == MSW, local[n+1] == LSW however locals
- // are accessed as negative so LSW is at LOW address
-
- // ld_off is MSW so get LSW
- const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
- next_off : ld_off;
- __ movq(r13, Address(saved_sp, offset));
- // st_off is LSW (i.e. reg.first())
- __ movq(Address(rsp, st_off), r13);
- }
- } else if (r_1->is_Register()) { // Register argument
- Register r = r_1->as_Register();
- assert(r != rax, "must be different");
- if (r_2->is_valid()) {
- //
- // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
- // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
- // So we must adjust where to pick up the data to match the interpreter.
-
- const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
- next_off : ld_off;
-
- // this can be a misaligned move
- __ movq(r, Address(saved_sp, offset));
- } else {
- // sign extend and use a full word?
- __ movl(r, Address(saved_sp, ld_off));
+ if (SigEntry::is_reserved_entry(sig, next_arg_comp)) {
+ continue; // Ignore reserved entry
}
- } else {
- if (!r_2->is_valid()) {
- __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
- } else {
- __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
+ int next_off = ld_off - Interpreter::stackElementSize;
+ int offset = (bt == T_LONG || bt == T_DOUBLE) ? next_off : ld_off;
+ const VMRegPair reg_pair = regs[next_arg_comp-ignored];
+ size_t size_in_bytes = reg_pair.second()->is_valid() ? 8 : 4;
+ gen_i2c_adapter_helper(masm, bt, next_arg_comp > 0 ? sig->at(next_arg_comp-1)._bt : T_ILLEGAL,
+ size_in_bytes, reg_pair, Address(saved_sp, offset), false);
+ next_arg_int++;
+ } else {
+ next_arg_int++;
+ ignored++;
+ // get the buffer for that value type
+ __ movptr(r10, Address(saved_sp, ld_off));
+ int vt = 1;
+ // load fields to registers/stack slots from the buffer: we know
+ // we are done with that value type argument when we hit the
+ // T_VOID that acts as an end of value type delimiter for this
+ // value type. Value types are flattened so we might encounter
+ // embedded value types. Each entry in sig_extended contains a
+ // field offset in the buffer.
+ do {
+ next_arg_comp++;
+ BasicType bt = sig->at(next_arg_comp)._bt;
+ BasicType prev_bt = sig->at(next_arg_comp-1)._bt;
+ if (bt == T_VALUETYPE) {
+ vt++;
+ ignored++;
+ } else if (bt == T_VOID &&
+ prev_bt != T_LONG &&
+ prev_bt != T_DOUBLE) {
+ vt--;
+ ignored++;
+ } else if (SigEntry::is_reserved_entry(sig, next_arg_comp)) {
+ // Ignore reserved entry
+ } else {
+ int off = sig->at(next_arg_comp)._offset;
+ assert(off > 0, "offset in object should be positive");
+ size_t size_in_bytes = is_java_primitive(bt) ? type2aelembytes(bt) : wordSize;
+ bool is_oop = (bt == T_OBJECT || bt == T_ARRAY);
+ gen_i2c_adapter_helper(masm, bt, prev_bt, size_in_bytes, regs[next_arg_comp - ignored], Address(r10, off), is_oop);
}
+ } while (vt != 0);
}
}
// 6243940 We might end up in handle_wrong_method if
// the callee is deoptimized as we race thru here. If that
@@ -916,26 +1199,36 @@
// and the vm will find there should this case occur.
__ movptr(Address(r15_thread, JavaThread::callee_target_offset()), rbx);
// put Method* where a c2i would expect should we end up there
- // only needed becaus eof c2 resolve stubs return Method* as a result in
+ // only needed because of c2 resolve stubs return Method* as a result in
// rax
__ mov(rax, rbx);
__ jmp(r11);
}
// ---------------------------------------------------------------
AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
- int total_args_passed,
int comp_args_on_stack,
- const BasicType *sig_bt,
- const VMRegPair *regs,
- AdapterFingerPrint* fingerprint) {
+ int comp_args_on_stack_cc,
+ const GrowableArray<SigEntry>* sig,
+ const VMRegPair* regs,
+ const GrowableArray<SigEntry>* sig_cc,
+ const VMRegPair* regs_cc,
+ const GrowableArray<SigEntry>* sig_cc_ro,
+ const VMRegPair* regs_cc_ro,
+ AdapterFingerPrint* fingerprint,
+ AdapterBlob*& new_adapter) {
address i2c_entry = __ pc();
-
- gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
+ if (StressValueTypePassFieldsAsArgs) {
+ // For stress testing, don't unpack value types in the i2c adapter but
+ // call the value type entry point and let it take care of unpacking.
+ gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
+ } else {
+ gen_i2c_adapter(masm, comp_args_on_stack_cc, sig_cc, regs_cc);
+ }
// -------------------------------------------------------------------------
// Generate a C2I adapter. On entry we know rbx holds the Method* during calls
// to the interpreter. The args start out packed in the compiled layout. They
// need to be unpacked into the interpreter layout. This will almost always
@@ -966,16 +1259,42 @@
__ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD);
__ jcc(Assembler::equal, skip_fixup);
__ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
}
+ OopMapSet* oop_maps = new OopMapSet();
+ int frame_complete = CodeOffsets::frame_never_safe;
+ int frame_size_in_words = 0;
+
+ // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
+ address c2i_value_ro_entry = __ pc();
+ if (regs_cc != regs_cc_ro) {
+ Label unused;
+ gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
+ skip_fixup = unused;
+ }
+
+ // Scalarized c2i adapter
address c2i_entry = __ pc();
+ gen_c2i_adapter(masm, sig_cc, regs_cc, skip_fixup, i2c_entry, oop_maps, frame_complete, frame_size_in_words, true);
- gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
+ // Non-scalarized c2i adapter
+ address c2i_value_entry = c2i_entry;
+ if (regs != regs_cc) {
+ c2i_value_entry = __ pc();
+ Label unused;
+ gen_c2i_adapter(masm, sig, regs, unused, i2c_entry, oop_maps, frame_complete, frame_size_in_words, false);
+ }
__ flush();
- return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
+
+ // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
+ // the GC knows about the location of oop argument locations passed to the c2i adapter.
+ bool caller_must_gc_arguments = (regs != regs_cc);
+ new_adapter = AdapterBlob::create(masm->code(), frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
+
+ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_value_entry, c2i_value_ro_entry, c2i_unverified_entry);
}
int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
VMRegPair *regs,
VMRegPair *regs2,
@@ -1029,10 +1348,11 @@
case T_LONG:
assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
// fall through
case T_OBJECT:
case T_ARRAY:
+ case T_VALUETYPE:
case T_ADDRESS:
case T_METADATA:
if (int_args < Argument::n_int_register_parameters_c) {
regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
#ifdef _WIN64
@@ -1379,11 +1699,11 @@
(in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
int offset = slot * VMRegImpl::stack_slot_size;
if (map != NULL) {
__ movq(Address(rsp, offset), in_regs[i].first()->as_Register());
if (in_sig_bt[i] == T_ARRAY) {
- map->set_oop(VMRegImpl::stack2reg(slot));;
+ map->set_oop(VMRegImpl::stack2reg(slot));
}
} else {
__ movq(in_regs[i].first()->as_Register(), Address(rsp, offset));
}
slot += VMRegImpl::slots_per_word;
@@ -1413,10 +1733,11 @@
case T_ARRAY:
case T_LONG:
// handled above
break;
case T_OBJECT:
+ case T_VALUETYPE:
default: ShouldNotReachHere();
}
} else if (in_regs[i].first()->is_XMMRegister()) {
if (in_sig_bt[i] == T_FLOAT) {
int offset = slot * VMRegImpl::stack_slot_size;
@@ -1788,11 +2109,12 @@
const VMRegPair* regs) {
Register temp_reg = rbx; // not part of any compiled calling seq
if (VerifyOops) {
for (int i = 0; i < method->size_of_parameters(); i++) {
if (sig_bt[i] == T_OBJECT ||
- sig_bt[i] == T_ARRAY) {
+ sig_bt[i] == T_ARRAY ||
+ sig_bt[i] == T_VALUETYPE) {
VMReg r = regs[i].first();
assert(r->is_valid(), "bad oop arg");
if (r->is_stack()) {
__ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
__ verify_oop(temp_reg);
@@ -2328,10 +2650,11 @@
freg_destroyed[out_regs[c_arg].first()->as_XMMRegister()->encoding()] = true;
}
#endif
break;
}
+ case T_VALUETYPE:
case T_OBJECT:
assert(!is_critical_native, "no oop arguments");
object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
((i == 0) && (!is_static)),
&receiver_offset);
@@ -2463,10 +2786,14 @@
// Load immediate 1 into swap_reg %rax
__ movl(swap_reg, 1);
// Load (object->mark() | 1) into swap_reg %rax
__ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
+ if (EnableValhalla && !UseBiasedLocking) {
+ // For slow path is_always_locked, using biased, which is never natural for !UseBiasLocking
+ __ andptr(swap_reg, ~markOopDesc::biased_lock_bit_in_place);
+ }
// Save (object->mark() | 1) into BasicLock's displaced header
__ movptr(Address(lock_reg, mark_word_offset), swap_reg);
// src -> dest iff dest == rax else rax <- dest
@@ -2524,10 +2851,11 @@
case T_DOUBLE :
case T_FLOAT :
// Result is in xmm0 we'll save as needed
break;
case T_ARRAY: // Really a handle
+ case T_VALUETYPE: // Really a handle
case T_OBJECT: // Really a handle
break; // can't de-handlize until after safepoint check
case T_VOID: break;
case T_LONG: break;
default : ShouldNotReachHere();
@@ -2677,11 +3005,11 @@
}
__ reset_last_Java_frame(false);
// Unbox oop result, e.g. JNIHandles::resolve value.
- if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+ if (ret_type == T_OBJECT || ret_type == T_ARRAY || ret_type == T_VALUETYPE) {
__ resolve_jobject(rax /* value */,
r15_thread /* thread */,
rcx /* tmp */);
}
@@ -4023,5 +4351,116 @@
// Set exception blob
_exception_blob = ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
}
#endif // COMPILER2
+
+BufferedValueTypeBlob* SharedRuntime::generate_buffered_value_type_adapter(const ValueKlass* vk) {
+ BufferBlob* buf = BufferBlob::create("value types pack/unpack", 16 * K);
+ CodeBuffer buffer(buf);
+ short buffer_locs[20];
+ buffer.insts()->initialize_shared_locs((relocInfo*)buffer_locs,
+ sizeof(buffer_locs)/sizeof(relocInfo));
+
+ MacroAssembler* masm = new MacroAssembler(&buffer);
+
+ const Array<SigEntry>* sig_vk = vk->extended_sig();
+ const Array<VMRegPair>* regs = vk->return_regs();
+
+ int pack_fields_off = __ offset();
+
+ int j = 1;
+ for (int i = 0; i < sig_vk->length(); i++) {
+ BasicType bt = sig_vk->at(i)._bt;
+ if (bt == T_VALUETYPE) {
+ continue;
+ }
+ if (bt == T_VOID) {
+ if (sig_vk->at(i-1)._bt == T_LONG ||
+ sig_vk->at(i-1)._bt == T_DOUBLE) {
+ j++;
+ }
+ continue;
+ }
+ int off = sig_vk->at(i)._offset;
+ assert(off > 0, "offset in object should be positive");
+ VMRegPair pair = regs->at(j);
+ VMReg r_1 = pair.first();
+ VMReg r_2 = pair.second();
+ Address to(rax, off);
+ if (bt == T_FLOAT) {
+ __ movflt(to, r_1->as_XMMRegister());
+ } else if (bt == T_DOUBLE) {
+ __ movdbl(to, r_1->as_XMMRegister());
+ } else if (bt == T_OBJECT || bt == T_ARRAY) {
+ Register val = r_1->as_Register();
+ assert_different_registers(rax, val);
+ // We don't need barriers because the destination is a newly allocated object.
+ // Also, we cannot use store_heap_oop(to, val) because it uses r8 as tmp.
+ if (UseCompressedOops) {
+ __ encode_heap_oop(val);
+ __ movl(to, val);
+ } else {
+ __ movptr(to, val);
+ }
+
+ } else {
+ assert(is_java_primitive(bt), "unexpected basic type");
+ assert_different_registers(rax, r_1->as_Register());
+ size_t size_in_bytes = type2aelembytes(bt);
+ __ store_sized_value(to, r_1->as_Register(), size_in_bytes);
+ }
+ j++;
+ }
+ assert(j == regs->length(), "missed a field?");
+
+ __ ret(0);
+
+ int unpack_fields_off = __ offset();
+
+ j = 1;
+ for (int i = 0; i < sig_vk->length(); i++) {
+ BasicType bt = sig_vk->at(i)._bt;
+ if (bt == T_VALUETYPE) {
+ continue;
+ }
+ if (bt == T_VOID) {
+ if (sig_vk->at(i-1)._bt == T_LONG ||
+ sig_vk->at(i-1)._bt == T_DOUBLE) {
+ j++;
+ }
+ continue;
+ }
+ int off = sig_vk->at(i)._offset;
+ assert(off > 0, "offset in object should be positive");
+ VMRegPair pair = regs->at(j);
+ VMReg r_1 = pair.first();
+ VMReg r_2 = pair.second();
+ Address from(rax, off);
+ if (bt == T_FLOAT) {
+ __ movflt(r_1->as_XMMRegister(), from);
+ } else if (bt == T_DOUBLE) {
+ __ movdbl(r_1->as_XMMRegister(), from);
+ } else if (bt == T_OBJECT || bt == T_ARRAY) {
+ assert_different_registers(rax, r_1->as_Register());
+ __ load_heap_oop(r_1->as_Register(), from);
+ } else {
+ assert(is_java_primitive(bt), "unexpected basic type");
+ assert_different_registers(rax, r_1->as_Register());
+ size_t size_in_bytes = type2aelembytes(bt);
+ __ load_sized_value(r_1->as_Register(), from, size_in_bytes, bt != T_CHAR && bt != T_BOOLEAN);
+ }
+ j++;
+ }
+ assert(j == regs->length(), "missed a field?");
+
+ if (StressValueTypeReturnedAsFields) {
+ __ load_klass(rax, rax);
+ __ orptr(rax, 1);
+ }
+
+ __ ret(0);
+
+ __ flush();
+
+ return BufferedValueTypeBlob::create(&buffer, pack_fields_off, unpack_fields_off);
+}
< prev index next >