--- old/src/hotspot/cpu/x86/x86_64.ad 2019-03-11 14:25:00.978356057 +0100 +++ new/src/hotspot/cpu/x86/x86_64.ad 2019-03-11 14:25:00.750356060 +0100 @@ -791,10 +791,8 @@ Compile* C = ra_->C; MacroAssembler _masm(&cbuf); - int framesize = C->frame_size_in_bytes(); - int bangsize = C->bang_size_in_bytes(); - - __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != NULL); + __ verified_entry(C); + __ bind(*_verified_entry); C->set_frame_complete(cbuf.insts_size()); @@ -868,29 +866,8 @@ __ vzeroupper(); } - int framesize = C->frame_size_in_bytes(); - assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - // Remove word for return adr already pushed - // and RBP - framesize -= 2*wordSize; - - // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here + __ restore_stack(C); - if (framesize) { - emit_opcode(cbuf, Assembler::REX_W); - if (framesize < 0x80) { - emit_opcode(cbuf, 0x83); // addq rsp, #framesize - emit_rm(cbuf, 0x3, 0x00, RSP_enc); - emit_d8(cbuf, framesize); - } else { - emit_opcode(cbuf, 0x81); // addq rsp, #framesize - emit_rm(cbuf, 0x3, 0x00, RSP_enc); - emit_d32(cbuf, framesize); - } - } - - // popq rbp - emit_opcode(cbuf, 0x58 | RBP_enc); if (StackReservedPages > 0 && C->has_reserved_stack_access()) { __ reserved_stack_check(); @@ -1463,6 +1440,39 @@ //============================================================================= #ifndef PRODUCT +void MachVEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const +{ + st->print_cr("MachVEPNode"); +} +#endif + +void MachVEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const +{ + MacroAssembler masm(&cbuf); + if (!_verified) { + uint insts_size = cbuf.insts_size(); + if (UseCompressedClassPointers) { + masm.load_klass(rscratch1, j_rarg0); + masm.cmpptr(rax, rscratch1); + } else { + masm.cmpptr(rax, Address(j_rarg0, oopDesc::klass_offset_in_bytes())); + } + masm.jump_cc(Assembler::notEqual, RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + } else { + // Unpack value type args passed as oop and then jump to + // the verified entry point (skipping the unverified entry). + masm.unpack_value_args(ra_->C, _receiver_only); + masm.jmp(*_verified_entry); + } +} + +uint MachVEPNode::size(PhaseRegAlloc* ra_) const +{ + return MachNode::size(ra_); // too many variables; just compute it the hard way +} + +//============================================================================= +#ifndef PRODUCT void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const { if (UseCompressedClassPointers) { @@ -6601,6 +6611,19 @@ ins_pipe(ialu_reg_reg); // XXX %} +instruct castN2X(rRegL dst, rRegN src) +%{ + match(Set dst (CastP2X src)); + + format %{ "movq $dst, $src\t# ptr -> long" %} + ins_encode %{ + if ($dst$$reg != $src$$reg) { + __ movptr($dst$$Register, $src$$Register); + } + %} + ins_pipe(ialu_reg_reg); // XXX +%} + instruct castP2X(rRegL dst, rRegP src) %{ match(Set dst (CastP2X src)); @@ -10836,15 +10859,14 @@ // ======================================================================= // fast clearing of an array -instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, +instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, Universe dummy, rFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large()); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); + predicate(!((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only()); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr); format %{ $$template - $$emit$$"xorq rax, rax\t# ClearArray:\n\t" $$emit$$"cmp InitArrayShortSize,rcx\n\t" $$emit$$"jg LARGE\n\t" $$emit$$"dec rcx\n\t" @@ -10858,19 +10880,20 @@ $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t" $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t" } else if (UseXMMForObjInit) { - $$emit$$"mov rdi,rax\n\t" - $$emit$$"vpxor ymm0,ymm0,ymm0\n\t" + $$emit$$"movdq $tmp, $val\n\t" + $$emit$$"punpcklqdq $tmp, $tmp\n\t" + $$emit$$"vinserti128_high $tmp, $tmp\n\t" $$emit$$"jmpq L_zero_64_bytes\n\t" $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"vmovdqu ymm0,(rax)\n\t" - $$emit$$"vmovdqu ymm0,0x20(rax)\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"vmovdqu $tmp,0x20(rax)\n\t" $$emit$$"add 0x40,rax\n\t" $$emit$$"# L_zero_64_bytes:\n\t" $$emit$$"sub 0x8,rcx\n\t" $$emit$$"jge L_loop\n\t" $$emit$$"add 0x4,rcx\n\t" $$emit$$"jl L_tail\n\t" - $$emit$$"vmovdqu ymm0,(rax)\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" $$emit$$"add 0x20,rax\n\t" $$emit$$"sub 0x4,rcx\n\t" $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" @@ -10889,38 +10912,94 @@ $$emit$$"# DONE" %} ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, false); + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, false, false); %} ins_pipe(pipe_slow); %} -instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero, +instruct rep_stos_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, + Universe dummy, rFlagsReg cr) +%{ + predicate(!((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only()); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr); + + format %{ $$template + $$emit$$"cmp InitArrayShortSize,rcx\n\t" + $$emit$$"jg LARGE\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"js DONE\t# Zero length\n\t" + $$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge LOOP\n\t" + $$emit$$"jmp DONE\n\t" + $$emit$$"# LARGE:\n\t" + if (UseXMMForObjInit) { + $$emit$$"movdq $tmp, $val\n\t" + $$emit$$"punpcklqdq $tmp, $tmp\n\t" + $$emit$$"vinserti128_high $tmp, $tmp\n\t" + $$emit$$"jmpq L_zero_64_bytes\n\t" + $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"vmovdqu $tmp,0x20(rax)\n\t" + $$emit$$"add 0x40,rax\n\t" + $$emit$$"# L_zero_64_bytes:\n\t" + $$emit$$"sub 0x8,rcx\n\t" + $$emit$$"jge L_loop\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jl L_tail\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"add 0x20,rax\n\t" + $$emit$$"sub 0x4,rcx\n\t" + $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jle L_end\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" + $$emit$$"vmovq xmm0,(rax)\n\t" + $$emit$$"add 0x8,rax\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge L_sloop\n\t" + $$emit$$"# L_end:\n\t" + } else { + $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t" + } + $$emit$$"# DONE" + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, false, true); + %} + ins_pipe(pipe_slow); +%} + +instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, Universe dummy, rFlagsReg cr) %{ - predicate(((ClearArrayNode*)n)->is_large()); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); + predicate(((ClearArrayNode*)n)->is_large() && !((ClearArrayNode*)n)->word_copy_only()); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr); format %{ $$template if (UseFastStosb) { - $$emit$$"xorq rax, rax\t# ClearArray:\n\t" $$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t" $$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--" } else if (UseXMMForObjInit) { - $$emit$$"mov rdi,rax\t# ClearArray:\n\t" - $$emit$$"vpxor ymm0,ymm0,ymm0\n\t" + $$emit$$"movdq $tmp, $val\n\t" + $$emit$$"punpcklqdq $tmp, $tmp\n\t" + $$emit$$"vinserti128_high $tmp, $tmp\n\t" $$emit$$"jmpq L_zero_64_bytes\n\t" $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"vmovdqu ymm0,(rax)\n\t" - $$emit$$"vmovdqu ymm0,0x20(rax)\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"vmovdqu $tmp,0x20(rax)\n\t" $$emit$$"add 0x40,rax\n\t" $$emit$$"# L_zero_64_bytes:\n\t" $$emit$$"sub 0x8,rcx\n\t" $$emit$$"jge L_loop\n\t" $$emit$$"add 0x4,rcx\n\t" $$emit$$"jl L_tail\n\t" - $$emit$$"vmovdqu ymm0,(rax)\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" $$emit$$"add 0x20,rax\n\t" $$emit$$"sub 0x4,rcx\n\t" $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" @@ -10934,13 +11013,58 @@ $$emit$$"jge L_sloop\n\t" $$emit$$"# L_end:\n\t" } else { - $$emit$$"xorq rax, rax\t# ClearArray:\n\t" $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--" } %} ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, true); + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, true, false); + %} + ins_pipe(pipe_slow); +%} + +instruct rep_stos_large_word_copy(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegL val, + Universe dummy, rFlagsReg cr) +%{ + predicate(((ClearArrayNode*)n)->is_large() && ((ClearArrayNode*)n)->word_copy_only()); + match(Set dummy (ClearArray (Binary cnt base) val)); + effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL cr); + + format %{ $$template + if (UseXMMForObjInit) { + $$emit$$"movdq $tmp, $val\n\t" + $$emit$$"punpcklqdq $tmp, $tmp\n\t" + $$emit$$"vinserti128_high $tmp, $tmp\n\t" + $$emit$$"jmpq L_zero_64_bytes\n\t" + $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"vmovdqu $tmp,0x20(rax)\n\t" + $$emit$$"add 0x40,rax\n\t" + $$emit$$"# L_zero_64_bytes:\n\t" + $$emit$$"sub 0x8,rcx\n\t" + $$emit$$"jge L_loop\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jl L_tail\n\t" + $$emit$$"vmovdqu $tmp,(rax)\n\t" + $$emit$$"add 0x20,rax\n\t" + $$emit$$"sub 0x4,rcx\n\t" + $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" + $$emit$$"add 0x4,rcx\n\t" + $$emit$$"jle L_end\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" + $$emit$$"vmovq xmm0,(rax)\n\t" + $$emit$$"add 0x8,rax\n\t" + $$emit$$"dec rcx\n\t" + $$emit$$"jge L_sloop\n\t" + $$emit$$"# L_end:\n\t" + } else { + $$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--" + } + %} + ins_encode %{ + __ clear_mem($base$$Register, $cnt$$Register, $val$$Register, + $tmp$$XMMRegister, true, true); %} ins_pipe(pipe_slow); %} @@ -12513,8 +12637,24 @@ %} // Call runtime without safepoint +// entry point is null, target holds the address to call +instruct CallLeafNoFPInDirect(rRegP target) +%{ + predicate(n->as_Call()->entry_point() == NULL); + match(CallLeafNoFP target); + + ins_cost(300); + format %{ "call_leaf_nofp,runtime indirect " %} + ins_encode %{ + __ call($target$$Register); + %} + + ins_pipe(pipe_slow); +%} + instruct CallLeafNoFPDirect(method meth) %{ + predicate(n->as_Call()->entry_point() != NULL); match(CallLeafNoFP); effect(USE meth);