--- /dev/null 2017-03-07 11:44:12.271151064 +0100 +++ new/src/cpu/x86/vm/gc/g1/g1BSCodeGen_x86.cpp 2017-04-25 16:46:05.379172045 +0200 @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/g1/c1G1BSCodeGen.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BSCodeGen.hpp" +#include "gc/g1/heapRegion.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "runtime/thread.hpp" +#include "interpreter/interp_masm.hpp" + +#define __ masm-> + +void G1BSCodeGen::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count) { + bool dest_uninitialized = (decorators & DEST_NOT_INITIALIZED) != 0; + + if (!dest_uninitialized) { + __ pusha(); // push registers + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ xchgptr(c_rarg1, c_rarg0); + } else { + __ movptr(c_rarg1, count); + __ movptr(c_rarg0, addr); + } + } else { + __ movptr(c_rarg0, addr); + __ movptr(c_rarg1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ModRefBarrierSet::static_write_ref_array_pre), 2); + __ popa(); + } +} + +void G1BSCodeGen::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp2) { + bool on_oop = type == T_OBJECT || type == T_ARRAY; + bool on_weak = (decorators & GC_ACCESS_ON_WEAK) != 0; + bool on_phantom = (decorators & GC_ACCESS_ON_PHANTOM) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBSCodeGen::load_at(masm, decorators, type, dst, src, tmp1, tmp2); + if (on_oop && on_reference) { + const Register thread = NOT_LP64(rcx) LP64_ONLY(r15_thread); + NOT_LP64(__ get_thread(thread)); + + // Generate the G1 pre-barrier code to log the value of + // the referent field or jniHandle in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + noreg /* obj */, + dst /* pre_val */, + thread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + } +} + +void G1BSCodeGen::g1_write_barrier_pre(MacroAssembler* masm, + Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + +#ifdef _LP64 + assert(thread == r15_thread, "must be"); +#endif // _LP64 + + Label done; + Label runtime; + + assert(pre_val != noreg, "check this code"); + + if (obj != noreg) { + assert_different_registers(obj, pre_val, tmp); + assert(pre_val != rax, "check this code"); + } + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_active())); + Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_buf())); + + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ cmpl(in_progress, 0); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ cmpb(in_progress, 0); + } + __ jcc(Assembler::equal, done); + + // Do we need to load the previous value? + if (obj != noreg) { + __ load_heap_oop(pre_val, Address(obj, 0)); + } + + // Is the previous value null? + __ cmpptr(pre_val, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + __ movptr(tmp, index); // tmp := *index_adr + __ cmpptr(tmp, 0); // tmp == 0? + __ jcc(Assembler::equal, runtime); // If yes, goto runtime + + __ subptr(tmp, wordSize); // tmp := tmp - wordSize + __ movptr(index, tmp); // *index_adr := tmp + __ addptr(tmp, buffer); // tmp := tmp + *buffer_adr + + // Record the previous value + __ movptr(Address(tmp, 0), pre_val); + __ jmp(done); + + __ bind(runtime); + // save the live input values + if(tosca_live) __ push(rax); + + if (obj != noreg && obj != rax) + __ push(obj); + + if (pre_val != rax) + __ push(pre_val); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + NOT_LP64( __ push(thread); ) + + if (expand_call) { + LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) + if (c_rarg1 != thread) { + __ mov(c_rarg1, thread); + } + if (c_rarg0 != pre_val) { + __ mov(c_rarg0, pre_val); + } + __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, G1BarrierSet::g1_wb_pre), 2); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::g1_wb_pre), pre_val, thread); + } + + NOT_LP64( __ pop(thread); ) + + // save the live input values + if (pre_val != rax) + __ pop(pre_val); + + if (obj != noreg && obj != rax) + __ pop(obj); + + if(tosca_live) __ pop(rax); + + __ bind(done); +} + +void G1BSCodeGen::g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { +#ifdef _LP64 + assert(thread == r15_thread, "must be"); +#endif // _LP64 + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_buf())); + + CardTableModRefBS* ct = + barrier_set_cast(Universe::heap()->barrier_set()); + assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // Does store cross heap regions? + + __ movptr(tmp, store_addr); + __ xorptr(tmp, new_val); + __ shrptr(tmp, HeapRegion::LogOfHRGrainBytes); + __ jcc(Assembler::equal, done); + + // crosses regions, storing NULL? + + __ cmpptr(new_val, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, done); + + // storing region crossing non-NULL, is card already dirty? + + const Register card_addr = tmp; + const Register cardtable = tmp2; + + __ movptr(card_addr, store_addr); + __ shrptr(card_addr, CardTable::card_shift); + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base()); + __ addptr(card_addr, cardtable); + + __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val()); + __ jcc(Assembler::equal, done); + + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); + __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val()); + __ jcc(Assembler::equal, done); + + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + __ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val()); + + __ cmpl(queue_index, 0); + __ jcc(Assembler::equal, runtime); + __ subl(queue_index, wordSize); + __ movptr(tmp2, buffer); +#ifdef _LP64 + __ movslq(rscratch1, queue_index); + __ addq(tmp2, rscratch1); + __ movq(Address(tmp2, 0), card_addr); +#else + __ addl(tmp2, queue_index); + __ movl(Address(tmp2, 0), card_addr); +#endif + __ jmp(done); + + __ bind(runtime); + // save the live input values + __ push(store_addr); + __ push(new_val); +#ifdef _LP64 + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::g1_wb_post), card_addr, r15_thread); +#else + __ push(thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::g1_wb_post), card_addr, thread); + __ pop(thread); +#endif + __ pop(new_val); + __ pop(store_addr); + + __ bind(done); +} + +void G1BSCodeGen::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + Register tmp3 = LP64_ONLY(r8) NOT_LP64(rsi); + Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); + // flatten object address if needed + // We do it regardless of precise because we need the registers + if (dst.index() == noreg && dst.disp() == 0) { + if (dst.base() != tmp1) { + __ movptr(tmp1, dst.base()); + } + } else { + __ lea(tmp1, dst); + } + + NOT_LP64(__ get_thread(rcx)); + NOT_LP64(__ save_bcp()); + + g1_write_barrier_pre(masm /*masm*/, + tmp1 /* obj */, + tmp2 /* pre_val */, + rthread /* thread */, + tmp3 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + if (val == noreg) { + __ store_heap_oop_null(Address(tmp1, 0)); + } else { + // G1 barrier needs uncompressed oop for region cross check. + Register new_val = val; + if (UseCompressedOops) { + new_val = tmp2; + __ movptr(new_val, val); + } + __ store_heap_oop(Address(tmp1, 0), val); + g1_write_barrier_post(masm /*masm*/, + tmp1 /* store_adr */, + new_val /* new_val */, + rthread /* thread */, + tmp3 /* tmp */, + tmp2 /* tmp2 */); + } + NOT_LP64( __ restore_bcp()); +} + +void G1BSCodeGen::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp) { + __ pusha(); // push registers (overkill) + if (c_rarg0 == count) { // On win64 c_rarg0 == rcx + assert_different_registers(c_rarg1, addr); + __ mov(c_rarg1, count); + __ mov(c_rarg0, addr); + } else { + assert_different_registers(c_rarg0, count); + __ mov(c_rarg0, addr); + __ mov(c_rarg1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ModRefBarrierSet::static_write_ref_array_post), 2); + __ popa(); +} + +#undef __ +#define __ ce->masm()-> + +void G1BSCodeGen::gen_g1_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { + C1G1BSCodeGen* code_gen = (C1G1BSCodeGen*)Universe::heap()->barrier_set()->c1_code_gen(); + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + + __ bind(stub->_entry); + assert(stub->pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + } + + __ cmpptr(pre_val_reg, (int32_t)NULL_WORD); + __ jcc(Assembler::equal, stub->_continuation); + ce->store_parameter(stub->pre_val()->as_register(), 0); + __ call(RuntimeAddress(code_gen->pre_barrier_c1_runtime_code_blob()->code_begin())); + __ jmp(stub->_continuation); + +} + +void G1BSCodeGen::gen_g1_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { + C1G1BSCodeGen* code_gen = (C1G1BSCodeGen*)Universe::heap()->barrier_set()->c1_code_gen(); + __ bind(stub->_entry); + assert(stub->addr()->is_register(), "Precondition."); + assert(stub->new_val()->is_register(), "Precondition."); + Register new_val_reg = stub->new_val()->as_register(); + __ cmpptr(new_val_reg, (int32_t) NULL_WORD); + __ jcc(Assembler::equal, stub->_continuation); + ce->store_parameter(stub->addr()->as_pointer_register(), 0); + __ call(RuntimeAddress(code_gen->post_barrier_c1_runtime_code_blob()->code_begin())); + __ jmp(stub->_continuation); +} + +#undef __ + +#define __ sasm-> + +void G1BSCodeGen::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_pre_barrier", false); + // arg0 : previous value of memory + + BarrierSet* bs = Universe::heap()->barrier_set(); + __ push(rax); + __ push(rdx); + + const Register pre_val = rax; + const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + const Register tmp = rdx; + + NOT_LP64(__ get_thread(thread);) + + Address queue_active(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_active())); + Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + SATBMarkQueue::byte_offset_of_buf())); + + Label done; + Label runtime; + + // Is marking still active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ cmpl(queue_active, 0); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ cmpb(queue_active, 0); + } + __ jcc(Assembler::equal, done); + + // Can we store original value in the thread's buffer? + + __ movptr(tmp, queue_index); + __ testptr(tmp, tmp); + __ jcc(Assembler::zero, runtime); + __ subptr(tmp, wordSize); + __ movptr(queue_index, tmp); + __ addptr(tmp, buffer); + + // prev_val (rax) + __ load_parameter(0, pre_val); + __ movptr(Address(tmp, 0), pre_val); + __ jmp(done); + + __ bind(runtime); + + __ save_live_registers_no_oop_map(3, true); + + // load the pre-value + __ load_parameter(0, rcx); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::g1_wb_pre), rcx, thread); + + __ restore_live_registers(true); + + __ bind(done); + + __ pop(rdx); + __ pop(rax); + + __ epilogue(); +} + +void G1BSCodeGen::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_post_barrier", false); + + // arg0: store_address + Address store_addr(rbp, 2*BytesPerWord); + + CardTableModRefBS* ct = + barrier_set_cast(Universe::heap()->barrier_set()); + assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label enqueued; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + + const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + DirtyCardQueue::byte_offset_of_buf())); + + __ push(rax); + __ push(rcx); + + const Register cardtable = rax; + const Register card_addr = rcx; + + __ load_parameter(0, card_addr); + __ shrptr(card_addr, CardTable::card_shift); + // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT + // a valid address and therefore is not properly handled by the relocation code. + __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base()); + __ addptr(card_addr, cardtable); + + NOT_LP64(__ get_thread(thread);) + + __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val()); + __ jcc(Assembler::equal, done); + + __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad)); + __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val()); + __ jcc(Assembler::equal, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + + __ movb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val()); + + const Register tmp = rdx; + __ push(rdx); + + __ movptr(tmp, queue_index); + __ testptr(tmp, tmp); + __ jcc(Assembler::zero, runtime); + __ subptr(tmp, wordSize); + __ movptr(queue_index, tmp); + __ addptr(tmp, buffer); + __ movptr(Address(tmp, 0), card_addr); + __ jmp(enqueued); + + __ bind(runtime); + + __ save_live_registers_no_oop_map(3, true); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSet::g1_wb_post), card_addr, thread); + + __ restore_live_registers(true); + + __ bind(enqueued); + __ pop(rdx); + + __ bind(done); + __ pop(rcx); + __ pop(rax); + + __ epilogue(); +} + +#undef __