--- old/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-03-11 14:24:55.798356128 +0100 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-03-11 14:24:55.594356131 +0100 @@ -47,6 +47,7 @@ #include "runtime/stubRoutines.hpp" #include "runtime/thread.hpp" #include "utilities/macros.hpp" +#include "vmreg_x86.inline.hpp" #include "crc32c.h" #ifdef COMPILER2 #include "opto/intrinsicnode.hpp" @@ -2425,6 +2426,10 @@ call_VM_leaf(entry_point, 3); } +void MacroAssembler::super_call_VM_leaf(address entry_point) { + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { pass_arg0(this, arg_0); MacroAssembler::call_VM_leaf_base(entry_point, 1); @@ -3419,6 +3424,51 @@ } } +void MacroAssembler::test_klass_is_value(Register klass, Register temp_reg, Label& is_value) { + movl(temp_reg, Address(klass, Klass::access_flags_offset())); + testl(temp_reg, JVM_ACC_VALUE); + jcc(Assembler::notZero, is_value); +} + +void MacroAssembler::test_field_is_flattenable(Register flags, Register temp_reg, Label& is_flattenable) { + movl(temp_reg, flags); + shrl(temp_reg, ConstantPoolCacheEntry::is_flattenable_field_shift); + andl(temp_reg, 0x1); + testl(temp_reg, temp_reg); + jcc(Assembler::notZero, is_flattenable); +} + +void MacroAssembler::test_field_is_not_flattenable(Register flags, Register temp_reg, Label& notFlattenable) { + movl(temp_reg, flags); + shrl(temp_reg, ConstantPoolCacheEntry::is_flattenable_field_shift); + andl(temp_reg, 0x1); + testl(temp_reg, temp_reg); + jcc(Assembler::zero, notFlattenable); +} + +void MacroAssembler::test_field_is_flattened(Register flags, Register temp_reg, Label& is_flattened) { + movl(temp_reg, flags); + shrl(temp_reg, ConstantPoolCacheEntry::is_flattened_field_shift); + andl(temp_reg, 0x1); + testl(temp_reg, temp_reg); + jcc(Assembler::notZero, is_flattened); +} + +void MacroAssembler::test_flat_array_klass(Register klass, Register temp_reg, + Label& is_flat_array) { + movl(temp_reg, Address(klass, Klass::layout_helper_offset())); + sarl(temp_reg, Klass::_lh_array_tag_shift); + cmpl(temp_reg, Klass::_lh_array_tag_vt_value); + jcc(Assembler::equal, is_flat_array); +} + + +void MacroAssembler::test_flat_array_oop(Register oop, Register temp_reg, + Label& is_flat_array) { + load_klass(temp_reg, oop); + test_flat_array_klass(temp_reg, temp_reg, is_flat_array); +} + void MacroAssembler::os_breakpoint() { // instead of directly emitting a breakpoint, call os:breakpoint for better debugability // (e.g., MSVC can't call ps() otherwise) @@ -4471,7 +4521,11 @@ } void MacroAssembler::verify_oop(Register reg, const char* s) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } // Pass register number to verify_oop_subroutine const char* b = NULL; @@ -4561,7 +4615,11 @@ void MacroAssembler::verify_oop_addr(Address addr, const char* s) { - if (!VerifyOops) return; + if (!VerifyOops || VerifyAdapterSharing) { + // Below address of the code string confuses VerifyAdapterSharing + // because it may differ between otherwise equivalent adapters. + return; + } // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); // Pass register number to verify_oop_subroutine @@ -5437,7 +5495,12 @@ #endif // _LP64 // C2 compiled method's prolog code. -void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) { +void MacroAssembler::verified_entry(Compile* C, int sp_inc) { + int framesize = C->frame_size_in_bytes(); + int bangsize = C->bang_size_in_bytes(); + bool fp_mode_24b = C->in_24_bit_fp_mode(); + int stack_bang_size = C->need_stack_bang(bangsize) ? bangsize : 0; + bool is_stub = C->stub_function() != NULL; // WARNING: Initial instruction MUST be 5 bytes or longer so that // NativeJump::patch_verified_entry will be able to patch out the entry @@ -5490,6 +5553,12 @@ } } + if (C->needs_stack_repair()) { + // Save stack increment (also account for fixed framesize and rbp) + assert((sp_inc & (StackAlignmentInBytes-1)) == 0, "stack increment not aligned"); + movptr(Address(rsp, C->sp_inc_offset()), sp_inc + framesize + wordSize); + } + if (VerifyStackAtCalls) { // Majik cookie to verify stack depth framesize -= wordSize; movptr(Address(rsp, framesize), (int32_t)0xbadb100d); @@ -5526,14 +5595,16 @@ } // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers -void MacroAssembler::xmm_clear_mem(Register base, Register cnt, XMMRegister xtmp) { +void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp) { // cnt - number of qwords (8-byte words). // base - start address, qword aligned. Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end; + movdq(xtmp, val); if (UseAVX >= 2) { - vpxor(xtmp, xtmp, xtmp, AVX_256bit); + punpcklqdq(xtmp, xtmp); + vinserti128_high(xtmp, xtmp); } else { - pxor(xtmp, xtmp); + punpcklqdq(xtmp, xtmp); } jmp(L_zero_64_bytes); @@ -5577,22 +5648,303 @@ BIND(L_end); } -void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, bool is_large) { +// Move a value between registers/stack slots and update the reg_state +bool MacroAssembler::move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[], int ret_off) { + if (reg_state[to->value()] == reg_written) { + return true; // Already written + } + if (from != to && bt != T_VOID) { + if (reg_state[to->value()] == reg_readonly) { + return false; // Not yet writable + } + if (from->is_reg()) { + if (to->is_reg()) { + if (from->is_XMMRegister()) { + if (bt == T_DOUBLE) { + movdbl(to->as_XMMRegister(), from->as_XMMRegister()); + } else { + assert(bt == T_FLOAT, "must be float"); + movflt(to->as_XMMRegister(), from->as_XMMRegister()); + } + } else { + movq(to->as_Register(), from->as_Register()); + } + } else { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + assert(st_off != ret_off, "overwriting return address at %d", st_off); + Address to_addr = Address(rsp, st_off); + if (from->is_XMMRegister()) { + if (bt == T_DOUBLE) { + movdbl(to_addr, from->as_XMMRegister()); + } else { + assert(bt == T_FLOAT, "must be float"); + movflt(to_addr, from->as_XMMRegister()); + } + } else { + movq(to_addr, from->as_Register()); + } + } + } else { + Address from_addr = Address(rsp, from->reg2stack() * VMRegImpl::stack_slot_size + wordSize); + if (to->is_reg()) { + if (to->is_XMMRegister()) { + if (bt == T_DOUBLE) { + movdbl(to->as_XMMRegister(), from_addr); + } else { + assert(bt == T_FLOAT, "must be float"); + movflt(to->as_XMMRegister(), from_addr); + } + } else { + movq(to->as_Register(), from_addr); + } + } else { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + assert(st_off != ret_off, "overwriting return address at %d", st_off); + movq(r13, from_addr); + movq(Address(rsp, st_off), r13); + } + } + } + // Update register states + reg_state[from->value()] = reg_writable; + reg_state[to->value()] = reg_written; + return true; +} + +// Read all fields from a value type oop and store the values in registers/stack slots +bool MacroAssembler::unpack_value_helper(const GrowableArray* sig, int& sig_index, VMReg from, VMRegPair* regs_to, int& to_index, RegState reg_state[], int ret_off) { + Register fromReg = from->is_reg() ? from->as_Register() : noreg; + assert(sig->at(sig_index)._bt == T_VOID, "should be at end delimiter"); + + int vt = 1; + bool done = true; + bool mark_done = true; + do { + sig_index--; + BasicType bt = sig->at(sig_index)._bt; + if (bt == T_VALUETYPE) { + vt--; + } else if (bt == T_VOID && + sig->at(sig_index-1)._bt != T_LONG && + sig->at(sig_index-1)._bt != T_DOUBLE) { + vt++; + } else if (SigEntry::is_reserved_entry(sig, sig_index)) { + to_index--; // Ignore this + } else { + assert(to_index >= 0, "invalid to_index"); + VMRegPair pair_to = regs_to[to_index--]; + VMReg to = pair_to.first(); + + if (bt == T_VOID) continue; + + int idx = (int)to->value(); + if (reg_state[idx] == reg_readonly) { + if (idx != from->value()) { + mark_done = false; + } + done = false; + continue; + } else if (reg_state[idx] == reg_written) { + continue; + } else { + assert(reg_state[idx] == reg_writable, "must be writable"); + reg_state[idx] = reg_written; + } + + if (fromReg == noreg) { + int st_off = from->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + movq(r10, Address(rsp, st_off)); + fromReg = r10; + } + + int off = sig->at(sig_index)._offset; + assert(off > 0, "offset in object should be positive"); + bool is_oop = (bt == T_OBJECT || bt == T_ARRAY); + + Address fromAddr = Address(fromReg, off); + bool is_signed = (bt != T_CHAR) && (bt != T_BOOLEAN); + if (!to->is_XMMRegister()) { + Register dst = to->is_stack() ? r13 : to->as_Register(); + if (is_oop) { + load_heap_oop(dst, fromAddr); + } else { + load_sized_value(dst, fromAddr, type2aelembytes(bt), is_signed); + } + if (to->is_stack()) { + int st_off = to->reg2stack() * VMRegImpl::stack_slot_size + wordSize; + assert(st_off != ret_off, "overwriting return address at %d", st_off); + movq(Address(rsp, st_off), dst); + } + } else { + if (bt == T_DOUBLE) { + movdbl(to->as_XMMRegister(), fromAddr); + } else { + assert(bt == T_FLOAT, "must be float"); + movflt(to->as_XMMRegister(), fromAddr); + } + } + } + } while (vt != 0); + if (mark_done && reg_state[from->value()] != reg_written) { + // This is okay because no one else will write to that slot + reg_state[from->value()] = reg_writable; + } + return done; +} + +// Unpack all value type arguments passed as oops +void MacroAssembler::unpack_value_args(Compile* C, bool receiver_only) { + assert(C->has_scalarized_args(), "value type argument scalarization is disabled"); + Method* method = C->method()->get_Method(); + const GrowableArray* sig_cc = method->adapter()->get_sig_cc(); + assert(sig_cc != NULL, "must have scalarized signature"); + + // Get unscalarized calling convention + BasicType* sig_bt = NEW_RESOURCE_ARRAY(BasicType, sig_cc->length()); + int args_passed = 0; + if (!method->is_static()) { + sig_bt[args_passed++] = T_OBJECT; + } + if (!receiver_only) { + for (SignatureStream ss(method->signature()); !ss.at_return_type(); ss.next()) { + BasicType bt = ss.type(); + sig_bt[args_passed++] = bt; + if (type2size[bt] == 2) { + sig_bt[args_passed++] = T_VOID; + } + } + } else { + // Only unpack the receiver, all other arguments are already scalarized + InstanceKlass* holder = method->method_holder(); + int rec_len = holder->is_value() ? ValueKlass::cast(holder)->extended_sig()->length() : 1; + // Copy scalarized signature but skip receiver, value type delimiters and reserved entries + for (int i = 0; i < sig_cc->length(); i++) { + if (!SigEntry::is_reserved_entry(sig_cc, i)) { + if (SigEntry::skip_value_delimiters(sig_cc, i) && rec_len <= 0) { + sig_bt[args_passed++] = sig_cc->at(i)._bt; + } + rec_len--; + } + } + } + VMRegPair* regs = NEW_RESOURCE_ARRAY(VMRegPair, args_passed); + int args_on_stack = SharedRuntime::java_calling_convention(sig_bt, regs, args_passed, false); + + // Get scalarized calling convention + int args_passed_cc = SigEntry::fill_sig_bt(sig_cc, sig_bt); + VMRegPair* regs_cc = NEW_RESOURCE_ARRAY(VMRegPair, sig_cc->length()); + int args_on_stack_cc = SharedRuntime::java_calling_convention(sig_bt, regs_cc, args_passed_cc, false); + + // Check if we need to extend the stack for unpacking + int sp_inc = (args_on_stack_cc - args_on_stack) * VMRegImpl::stack_slot_size; + if (sp_inc > 0) { + // Save the return address, adjust the stack (make sure it is properly + // 16-byte aligned) and copy the return address to the new top of the stack. + pop(r13); + sp_inc = align_up(sp_inc, StackAlignmentInBytes); + subptr(rsp, sp_inc); + push(r13); + } else { + // The scalarized calling convention needs less stack space than the unscalarized one. + // No need to extend the stack, the caller will take care of these adjustments. + sp_inc = 0; + } + + // Initialize register/stack slot states (make all writable) + int max_stack = MAX2(args_on_stack + sp_inc/VMRegImpl::stack_slot_size, args_on_stack_cc); + int max_reg = VMRegImpl::stack2reg(max_stack)->value(); + RegState* reg_state = NEW_RESOURCE_ARRAY(RegState, max_reg); + for (int i = 0; i < max_reg; ++i) { + reg_state[i] = reg_writable; + } + // Set all source registers/stack slots to readonly to prevent accidental overwriting + for (int i = 0; i < args_passed; ++i) { + VMReg reg = regs[i].first(); + if (!reg->is_valid()) continue; + if (reg->is_stack()) { + // Update source stack location by adding stack increment + reg = VMRegImpl::stack2reg(reg->reg2stack() + sp_inc/VMRegImpl::stack_slot_size); + regs[i] = reg; + } + assert(reg->value() >= 0 && reg->value() < max_reg, "reg value out of bounds"); + reg_state[reg->value()] = reg_readonly; + } + + // Emit code for unpacking value type arguments + // We try multiple times and eventually start spilling to resolve (circular) dependencies + bool done = false; + for (int i = 0; i < 2*args_passed_cc && !done; ++i) { + done = true; + bool spill = (i > args_passed_cc); // Start spilling? + // Iterate over all arguments (in reverse) + for (int from_index = args_passed-1, to_index = args_passed_cc-1, sig_index = sig_cc->length()-1; sig_index >= 0; sig_index--) { + if (SigEntry::is_reserved_entry(sig_cc, sig_index)) { + to_index--; // Skip reserved entry + } else { + assert(from_index >= 0, "index out of bounds"); + VMReg reg = regs[from_index].first(); + if (spill && reg->is_valid() && reg_state[reg->value()] == reg_readonly) { + // Spill argument to be able to write the source and resolve circular dependencies + VMReg spill_reg = reg->is_XMMRegister() ? xmm8->as_VMReg() : r14->as_VMReg(); + bool res = move_helper(reg, spill_reg, T_DOUBLE, reg_state, sp_inc); + assert(res, "Spilling should not fail"); + // Set spill_reg as new source and update state + reg = spill_reg; + regs[from_index].set1(reg); + reg_state[reg->value()] = reg_readonly; + spill = false; // Do not spill again in this round + } + BasicType bt = sig_cc->at(sig_index)._bt; + if (SigEntry::skip_value_delimiters(sig_cc, sig_index)) { + assert(to_index >= 0, "index out of bounds"); + done &= move_helper(reg, regs_cc[to_index].first(), bt, reg_state, sp_inc); + to_index--; + } else if (!receiver_only || (from_index == 0 && bt == T_VOID)) { + done &= unpack_value_helper(sig_cc, sig_index, reg, regs_cc, to_index, reg_state, sp_inc); + } else { + continue; + } + from_index--; + } + } + } + guarantee(done, "Could not resolve circular dependency when unpacking value type arguments"); + + // Emit code for verified entry and save increment for stack repair on return + verified_entry(C, sp_inc); +} + +// Restores the stack on return +void MacroAssembler::restore_stack(Compile* C) { + int framesize = C->frame_size_in_bytes(); + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + // Remove word for return addr already pushed and RBP + framesize -= 2*wordSize; + + if (C->needs_stack_repair()) { + // Restore rbp and repair rsp by adding the stack increment + movq(rbp, Address(rsp, framesize)); + addq(rsp, Address(rsp, C->sp_inc_offset())); + } else { + if (framesize > 0) { + addq(rsp, framesize); + } + pop(rbp); + } +} + +void MacroAssembler::clear_mem(Register base, Register cnt, Register val, XMMRegister xtmp, bool is_large, bool word_copy_only) { // cnt - number of qwords (8-byte words). // base - start address, qword aligned. // is_large - if optimizers know cnt is larger than InitArrayShortSize assert(base==rdi, "base register must be edi for rep stos"); - assert(tmp==rax, "tmp register must be eax for rep stos"); + assert(val==rax, "tmp register must be eax for rep stos"); assert(cnt==rcx, "cnt register must be ecx for rep stos"); assert(InitArrayShortSize % BytesPerLong == 0, "InitArrayShortSize should be the multiple of BytesPerLong"); Label DONE; - if (!is_large || !UseXMMForObjInit) { - xorptr(tmp, tmp); - } - if (!is_large) { Label LOOP, LONG; cmpptr(cnt, InitArrayShortSize/BytesPerLong); @@ -5605,7 +5957,7 @@ // Use individual pointer-sized stores for small counts: BIND(LOOP); - movptr(Address(base, cnt, Address::times_ptr), tmp); + movptr(Address(base, cnt, Address::times_ptr), val); decrement(cnt); jccb(Assembler::greaterEqual, LOOP); jmpb(DONE); @@ -5614,12 +5966,11 @@ } // Use longer rep-prefixed ops for non-small counts: - if (UseFastStosb) { + if (UseFastStosb && !word_copy_only) { shlptr(cnt, 3); // convert to number of bytes rep_stosb(); } else if (UseXMMForObjInit) { - movptr(tmp, base); - xmm_clear_mem(tmp, cnt, xtmp); + xmm_clear_mem(base, cnt, val, xtmp); } else { NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM rep_stos();