< prev index next >
src/hotspot/cpu/x86/macroAssembler_x86.cpp
Print this page
@@ -45,10 +45,11 @@
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/thread.hpp"
#include "utilities/macros.hpp"
+#include "vmreg_x86.inline.hpp"
#include "crc32c.h"
#ifdef COMPILER2
#include "opto/intrinsicnode.hpp"
#endif
@@ -2423,10 +2424,14 @@
pass_arg1(this, arg_1);
pass_arg0(this, arg_0);
call_VM_leaf(entry_point, 3);
}
+void MacroAssembler::super_call_VM_leaf(address entry_point) {
+ MacroAssembler::call_VM_leaf_base(entry_point, 1);
+}
+
void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
pass_arg0(this, arg_0);
MacroAssembler::call_VM_leaf_base(entry_point, 1);
}
@@ -3417,10 +3422,55 @@
// nothing to do, (later) access of M[reg + offset]
// will provoke OS NULL exception if reg = NULL
}
}
+void MacroAssembler::test_klass_is_value(Register klass, Register temp_reg, Label& is_value) {
+ movl(temp_reg, Address(klass, Klass::access_flags_offset()));
+ testl(temp_reg, JVM_ACC_VALUE);
+ jcc(Assembler::notZero, is_value);
+}
+
+void MacroAssembler::test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable) {
+ movl(temp_reg, flags);
+ shrl(temp_reg, ConstantPoolCacheEntry::is_flattenable_field_shift);
+ andl(temp_reg, 0x1);
+ testl(temp_reg, temp_reg);
+ jcc(Assembler::notZero, is_flattenable);
+}
+
+void MacroAssembler::test_field_is_not_flattenable(Register flags, Register temp_reg, Label& notFlattenable) {
+ movl(temp_reg, flags);
+ shrl(temp_reg, ConstantPoolCacheEntry::is_flattenable_field_shift);
+ andl(temp_reg, 0x1);
+ testl(temp_reg, temp_reg);
+ jcc(Assembler::zero, notFlattenable);
+}
+
+void MacroAssembler::test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened) {
+ movl(temp_reg, flags);
+ shrl(temp_reg, ConstantPoolCacheEntry::is_flattened_field_shift);
+ andl(temp_reg, 0x1);
+ testl(temp_reg, temp_reg);
+ jcc(Assembler::notZero, is_flattened);
+}
+
+void MacroAssembler::test_flat_array_klass(Register klass, Register temp_reg,
+ Label& is_flat_array) {
+ movl(temp_reg, Address(klass, Klass::layout_helper_offset()));
+ sarl(temp_reg, Klass::_lh_array_tag_shift);
+ cmpl(temp_reg, Klass::_lh_array_tag_vt_value);
+ jcc(Assembler::equal, is_flat_array);
+}
+
+
+void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg,
+ Label& is_flat_array) {
+ load_klass(temp_reg, oop);
+ test_flat_array_klass(temp_reg, temp_reg, is_flat_array);
+}
+
void MacroAssembler::os_breakpoint() {
// instead of directly emitting a breakpoint, call os:breakpoint for better debugability
// (e.g., MSVC can't call ps() otherwise)
call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
}
@@ -4469,11 +4519,15 @@
bind(L);
}
}
void MacroAssembler::verify_oop(Register reg, const char* s) {
- if (!VerifyOops) return;
+ if (!VerifyOops || VerifyAdapterSharing) {
+ // Below address of the code string confuses VerifyAdapterSharing
+ // because it may differ between otherwise equivalent adapters.
+ return;
+ }
// Pass register number to verify_oop_subroutine
const char* b = NULL;
{
ResourceMark rm;
@@ -4559,11 +4613,15 @@
return Address(rsp, scale_reg, scale_factor, offset);
}
void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
- if (!VerifyOops) return;
+ if (!VerifyOops || VerifyAdapterSharing) {
+ // Below address of the code string confuses VerifyAdapterSharing
+ // because it may differ between otherwise equivalent adapters.
+ return;
+ }
// Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord);
// Pass register number to verify_oop_subroutine
const char* b = NULL;
{
@@ -5435,11 +5493,16 @@
}
#endif // _LP64
// C2 compiled method's prolog code.
-void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) {
+void MacroAssembler::verified_entry(Compile* C, int sp_inc) {
+ int framesize = C->frame_size_in_bytes();
+ int bangsize = C->bang_size_in_bytes();
+ bool fp_mode_24b = C->in_24_bit_fp_mode();
+ int stack_bang_size = C->need_stack_bang(bangsize) ? bangsize : 0;
+ bool is_stub = C->stub_function() != NULL;
// WARNING: Initial instruction MUST be 5 bytes or longer so that
// NativeJump::patch_verified_entry will be able to patch out the entry
// code safely. The push to verify stack depth is ok at 5 bytes,
// the frame allocation can be either 3 or 6 bytes. So if we don't do
@@ -5488,10 +5551,16 @@
addptr(rbp, framesize);
}
}
}
+ if (C->needs_stack_repair()) {
+ // Save stack increment (also account for fixed framesize and rbp)
+ assert((sp_inc & (StackAlignmentInBytes-1)) == 0, "stack increment not aligned");
+ movptr(Address(rsp, C->sp_inc_offset()), sp_inc + framesize + wordSize);
+ }
+
if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
framesize -= wordSize;
movptr(Address(rsp, framesize), (int32_t)0xbadb100d);
}
@@ -5524,18 +5593,20 @@
bs->nmethod_entry_barrier(this);
}
}
// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
-void MacroAssembler::xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp) {
+void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp) {
// cnt - number of qwords (8-byte words).
// base - start address, qword aligned.
Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
+ movdq(xtmp, val);
if (UseAVX >= 2) {
- vpxor(xtmp, xtmp, xtmp, AVX_256bit);
+ punpcklqdq(xtmp, xtmp);
+ vinserti128_high(xtmp, xtmp);
} else {
- pxor(xtmp, xtmp);
+ punpcklqdq(xtmp, xtmp);
}
jmp(L_zero_64_bytes);
BIND(L_loop);
if (UseAVX >= 2) {
@@ -5575,26 +5646,307 @@
decrement(cnt);
jccb(Assembler::greaterEqual, L_sloop);
BIND(L_end);
}
-void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, bool is_large) {
+// Move a value between registers/stack slots and update the reg_state
+bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[], int ret_off) {
+ if (reg_state[to->value()] == reg_written) {
+ return true; // Already written
+ }
+ if (from != to && bt != T_VOID) {
+ if (reg_state[to->value()] == reg_readonly) {
+ return false; // Not yet writable
+ }
+ if (from->is_reg()) {
+ if (to->is_reg()) {
+ if (from->is_XMMRegister()) {
+ if (bt == T_DOUBLE) {
+ movdbl(to->as_XMMRegister(), from->as_XMMRegister());
+ } else {
+ assert(bt == T_FLOAT, "must be float");
+ movflt(to->as_XMMRegister(), from->as_XMMRegister());
+ }
+ } else {
+ movq(to->as_Register(), from->as_Register());
+ }
+ } else {
+ int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
+ assert(st_off != ret_off, "overwriting return address at %d", st_off);
+ Address to_addr = Address(rsp, st_off);
+ if (from->is_XMMRegister()) {
+ if (bt == T_DOUBLE) {
+ movdbl(to_addr, from->as_XMMRegister());
+ } else {
+ assert(bt == T_FLOAT, "must be float");
+ movflt(to_addr, from->as_XMMRegister());
+ }
+ } else {
+ movq(to_addr, from->as_Register());
+ }
+ }
+ } else {
+ Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize);
+ if (to->is_reg()) {
+ if (to->is_XMMRegister()) {
+ if (bt == T_DOUBLE) {
+ movdbl(to->as_XMMRegister(), from_addr);
+ } else {
+ assert(bt == T_FLOAT, "must be float");
+ movflt(to->as_XMMRegister(), from_addr);
+ }
+ } else {
+ movq(to->as_Register(), from_addr);
+ }
+ } else {
+ int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
+ assert(st_off != ret_off, "overwriting return address at %d", st_off);
+ movq(r13, from_addr);
+ movq(Address(rsp, st_off), r13);
+ }
+ }
+ }
+ // Update register states
+ reg_state[from->value()] = reg_writable;
+ reg_state[to->value()] = reg_written;
+ return true;
+}
+
+// Read all fields from a value type oop and store the values in registers/stack slots
+bool MacroAssembler::unpack_value_helper(const GrowableArray<SigEntry>* sig, int& sig_index, VMReg from, VMRegPair* regs_to, int& to_index, RegState reg_state[], int ret_off) {
+ Register fromReg = from->is_reg() ? from->as_Register() : noreg;
+ assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter");
+
+ int vt = 1;
+ bool done = true;
+ bool mark_done = true;
+ do {
+ sig_index--;
+ BasicType bt = sig->at(sig_index)._bt;
+ if (bt == T_VALUETYPE) {
+ vt--;
+ } else if (bt == T_VOID &&
+ sig->at(sig_index-1)._bt != T_LONG &&
+ sig->at(sig_index-1)._bt != T_DOUBLE) {
+ vt++;
+ } else if (SigEntry::is_reserved_entry(sig, sig_index)) {
+ to_index--; // Ignore this
+ } else {
+ assert(to_index >= 0, "invalid to_index");
+ VMRegPair pair_to = regs_to[to_index--];
+ VMReg to = pair_to.first();
+
+ if (bt == T_VOID) continue;
+
+ int idx = (int)to->value();
+ if (reg_state[idx] == reg_readonly) {
+ if (idx != from->value()) {
+ mark_done = false;
+ }
+ done = false;
+ continue;
+ } else if (reg_state[idx] == reg_written) {
+ continue;
+ } else {
+ assert(reg_state[idx] == reg_writable, "must be writable");
+ reg_state[idx] = reg_written;
+ }
+
+ if (fromReg == noreg) {
+ int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
+ movq(r10, Address(rsp, st_off));
+ fromReg = r10;
+ }
+
+ int off = sig->at(sig_index)._offset;
+ assert(off > 0, "offset in object should be positive");
+ bool is_oop = (bt == T_OBJECT || bt == T_ARRAY);
+
+ Address fromAddr = Address(fromReg, off);
+ bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN);
+ if (!to->is_XMMRegister()) {
+ Register dst = to->is_stack() ? r13 : to->as_Register();
+ if (is_oop) {
+ load_heap_oop(dst, fromAddr);
+ } else {
+ load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed);
+ }
+ if (to->is_stack()) {
+ int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize;
+ assert(st_off != ret_off, "overwriting return address at %d", st_off);
+ movq(Address(rsp, st_off), dst);
+ }
+ } else {
+ if (bt == T_DOUBLE) {
+ movdbl(to->as_XMMRegister(), fromAddr);
+ } else {
+ assert(bt == T_FLOAT, "must be float");
+ movflt(to->as_XMMRegister(), fromAddr);
+ }
+ }
+ }
+ } while (vt != 0);
+ if (mark_done && reg_state[from->value()] != reg_written) {
+ // This is okay because no one else will write to that slot
+ reg_state[from->value()] = reg_writable;
+ }
+ return done;
+}
+
+// Unpack all value type arguments passed as oops
+void MacroAssembler::unpack_value_args(Compile* C, bool receiver_only) {
+ assert(C->has_scalarized_args(), "value type argument scalarization is disabled");
+ Method* method = C->method()->get_Method();
+ const GrowableArray<SigEntry>* sig_cc = method->adapter()->get_sig_cc();
+ assert(sig_cc != NULL, "must have scalarized signature");
+
+ // Get unscalarized calling convention
+ BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, sig_cc->length());
+ int args_passed = 0;
+ if (!method->is_static()) {
+ sig_bt[args_passed++] = T_OBJECT;
+ }
+ if (!receiver_only) {
+ for (SignatureStream ss(method->signature()); !ss.at_return_type(); ss.next()) {
+ BasicType bt = ss.type();
+ sig_bt[args_passed++] = bt;
+ if (type2size[bt] == 2) {
+ sig_bt[args_passed++] = T_VOID;
+ }
+ }
+ } else {
+ // Only unpack the receiver, all other arguments are already scalarized
+ InstanceKlass* holder = method->method_holder();
+ int rec_len = holder->is_value() ? ValueKlass::cast(holder)->extended_sig()->length() : 1;
+ // Copy scalarized signature but skip receiver, value type delimiters and reserved entries
+ for (int i = 0; i < sig_cc->length(); i++) {
+ if (!SigEntry::is_reserved_entry(sig_cc, i)) {
+ if (SigEntry::skip_value_delimiters(sig_cc, i) && rec_len <= 0) {
+ sig_bt[args_passed++] = sig_cc->at(i)._bt;
+ }
+ rec_len--;
+ }
+ }
+ }
+ VMRegPair* regs = NEW_RESOURCE_ARRAY(VMRegPair, args_passed);
+ int args_on_stack = SharedRuntime::java_calling_convention(sig_bt, regs, args_passed, false);
+
+ // Get scalarized calling convention
+ int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt);
+ VMRegPair* regs_cc = NEW_RESOURCE_ARRAY(VMRegPair, sig_cc->length());
+ int args_on_stack_cc = SharedRuntime::java_calling_convention(sig_bt, regs_cc, args_passed_cc, false);
+
+ // Check if we need to extend the stack for unpacking
+ int sp_inc = (args_on_stack_cc - args_on_stack) * VMRegImpl::stack_slot_size;
+ if (sp_inc > 0) {
+ // Save the return address, adjust the stack (make sure it is properly
+ // 16-byte aligned) and copy the return address to the new top of the stack.
+ pop(r13);
+ sp_inc = align_up(sp_inc, StackAlignmentInBytes);
+ subptr(rsp, sp_inc);
+ push(r13);
+ } else {
+ // The scalarized calling convention needs less stack space than the unscalarized one.
+ // No need to extend the stack, the caller will take care of these adjustments.
+ sp_inc = 0;
+ }
+
+ // Initialize register/stack slot states (make all writable)
+ int max_stack = MAX2(args_on_stack + sp_inc/VMRegImpl::stack_slot_size, args_on_stack_cc);
+ int max_reg = VMRegImpl::stack2reg(max_stack)->value();
+ RegState* reg_state = NEW_RESOURCE_ARRAY(RegState, max_reg);
+ for (int i = 0; i < max_reg; ++i) {
+ reg_state[i] = reg_writable;
+ }
+ // Set all source registers/stack slots to readonly to prevent accidental overwriting
+ for (int i = 0; i < args_passed; ++i) {
+ VMReg reg = regs[i].first();
+ if (!reg->is_valid()) continue;
+ if (reg->is_stack()) {
+ // Update source stack location by adding stack increment
+ reg = VMRegImpl::stack2reg(reg->reg2stack() + sp_inc/VMRegImpl::stack_slot_size);
+ regs[i] = reg;
+ }
+ assert(reg->value() >= 0 && reg->value() < max_reg, "reg value out of bounds");
+ reg_state[reg->value()] = reg_readonly;
+ }
+
+ // Emit code for unpacking value type arguments
+ // We try multiple times and eventually start spilling to resolve (circular) dependencies
+ bool done = false;
+ for (int i = 0; i < 2*args_passed_cc && !done; ++i) {
+ done = true;
+ bool spill = (i > args_passed_cc); // Start spilling?
+ // Iterate over all arguments (in reverse)
+ for (int from_index = args_passed-1, to_index = args_passed_cc-1, sig_index = sig_cc->length()-1; sig_index >= 0; sig_index--) {
+ if (SigEntry::is_reserved_entry(sig_cc, sig_index)) {
+ to_index--; // Skip reserved entry
+ } else {
+ assert(from_index >= 0, "index out of bounds");
+ VMReg reg = regs[from_index].first();
+ if (spill && reg->is_valid() && reg_state[reg->value()] == reg_readonly) {
+ // Spill argument to be able to write the source and resolve circular dependencies
+ VMReg spill_reg = reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg();
+ bool res = move_helper(reg, spill_reg, T_DOUBLE, reg_state, sp_inc);
+ assert(res, "Spilling should not fail");
+ // Set spill_reg as new source and update state
+ reg = spill_reg;
+ regs[from_index].set1(reg);
+ reg_state[reg->value()] = reg_readonly;
+ spill = false; // Do not spill again in this round
+ }
+ BasicType bt = sig_cc->at(sig_index)._bt;
+ if (SigEntry::skip_value_delimiters(sig_cc, sig_index)) {
+ assert(to_index >= 0, "index out of bounds");
+ done &= move_helper(reg, regs_cc[to_index].first(), bt, reg_state, sp_inc);
+ to_index--;
+ } else if (!receiver_only || (from_index == 0 && bt == T_VOID)) {
+ done &= unpack_value_helper(sig_cc, sig_index, reg, regs_cc, to_index, reg_state, sp_inc);
+ } else {
+ continue;
+ }
+ from_index--;
+ }
+ }
+ }
+ guarantee(done, "Could not resolve circular dependency when unpacking value type arguments");
+
+ // Emit code for verified entry and save increment for stack repair on return
+ verified_entry(C, sp_inc);
+}
+
+// Restores the stack on return
+void MacroAssembler::restore_stack(Compile* C) {
+ int framesize = C->frame_size_in_bytes();
+ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
+ // Remove word for return addr already pushed and RBP
+ framesize -= 2*wordSize;
+
+ if (C->needs_stack_repair()) {
+ // Restore rbp and repair rsp by adding the stack increment
+ movq(rbp, Address(rsp, framesize));
+ addq(rsp, Address(rsp, C->sp_inc_offset()));
+ } else {
+ if (framesize > 0) {
+ addq(rsp, framesize);
+ }
+ pop(rbp);
+ }
+}
+
+void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, bool is_large, bool word_copy_only) {
// cnt - number of qwords (8-byte words).
// base - start address, qword aligned.
// is_large - if optimizers know cnt is larger than InitArrayShortSize
assert(base==rdi, "base register must be edi for rep stos");
- assert(tmp==rax, "tmp register must be eax for rep stos");
+ assert(val==rax, "tmp register must be eax for rep stos");
assert(cnt==rcx, "cnt register must be ecx for rep stos");
assert(InitArrayShortSize % BytesPerLong == 0,
"InitArrayShortSize should be the multiple of BytesPerLong");
Label DONE;
- if (!is_large || !UseXMMForObjInit) {
- xorptr(tmp, tmp);
- }
-
if (!is_large) {
Label LOOP, LONG;
cmpptr(cnt, InitArrayShortSize/BytesPerLong);
jccb(Assembler::greater, LONG);
@@ -5603,25 +5955,24 @@
decrement(cnt);
jccb(Assembler::negative, DONE); // Zero length
// Use individual pointer-sized stores for small counts:
BIND(LOOP);
- movptr(Address(base, cnt, Address::times_ptr), tmp);
+ movptr(Address(base, cnt, Address::times_ptr), val);
decrement(cnt);
jccb(Assembler::greaterEqual, LOOP);
jmpb(DONE);
BIND(LONG);
}
// Use longer rep-prefixed ops for non-small counts:
- if (UseFastStosb) {
+ if (UseFastStosb && !word_copy_only) {
shlptr(cnt, 3); // convert to number of bytes
rep_stosb();
} else if (UseXMMForObjInit) {
- movptr(tmp, base);
- xmm_clear_mem(tmp, cnt, xtmp);
+ xmm_clear_mem(base, cnt, val, xtmp);
} else {
NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
rep_stos();
}
< prev index next >