--- /dev/null 2018-09-25 19:24:48.000000000 +0300 +++ new/src/hotspot/cpu/aarch32/gc/g1/g1BarrierSetAssembler_aarch32.cpp 2018-09-25 19:24:48.000000000 +0300 @@ -0,0 +1,471 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "c1/c1_LIRAssembler.hpp" +#include "c1/c1_MacroAssembler.hpp" +#include "gc/g1/c1/g1BarrierSetC1.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1BarrierSetAssembler.hpp" +#include "gc/g1/g1BarrierSetRuntime.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" +#include "gc/g1/heapRegion.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.hpp" +#include "interpreter/interp_masm.hpp" +#include "runtime/sharedRuntime.hpp" + +#define __ masm-> + +void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register addr, Register count) { + bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; + // With G1, don't generate the call if we statically know that the target in uninitialized + if (!dest_uninitialized) { + __ push(RegSet::range(r0, r3), sp); + if (count == c_rarg0) { + if (addr == c_rarg1) { + // exactly backwards!! + __ eor(c_rarg0, c_rarg0, c_rarg1); + __ eor(c_rarg1, c_rarg0, c_rarg1); + __ eor(c_rarg0, c_rarg0, c_rarg1); + } else { + __ mov(c_rarg1, count); + __ mov(c_rarg0, addr); + } + } else { + __ mov(c_rarg0, addr); + __ mov(c_rarg1, count); + } + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); + __ pop(RegSet::range(r0, r3), sp); + } +} + +void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, + Register start, Register end, Register scratch) { + // must compute element count unless barrier set interface is changed (other platforms supply count) + assert_different_registers(start, end, scratch); + __ lea(scratch, Address(end, BytesPerHeapOop)); + __ sub(scratch, scratch, start); // subtract start to get #bytes + __ lsr(scratch, scratch, LogBytesPerHeapOop); // convert to element count + __ mov(c_rarg0, start); + __ mov(c_rarg1, scratch); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); +} + + +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Address obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + + assert(thread == rthread, "must be"); + + Label done; + Label runtime; + + assert(pre_val != noreg, "check this code"); + + assert_different_registers(pre_val, tmp); + if (obj.get_mode() != Address::no_mode) + assert(!obj.uses(pre_val) && !obj.uses(tmp), "destroys register"); + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + + // Is marking active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ldr(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldrb(tmp, in_progress); + } + __ cbz(tmp, done); + + // Do we need to load the previous value? + if (obj.get_mode() != Address::no_mode) { + __ load_heap_oop(pre_val, obj, noreg, noreg, AS_RAW); + } + + // Is the previous value null? + __ cbz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + __ ldr(tmp, index); // tmp := *index_adr + __ cbz(tmp, runtime); // tmp == 0? + // If yes, goto runtime + + __ sub(tmp, tmp, wordSize); // tmp := tmp - wordSize + __ str(tmp, index); // *index_adr := tmp + __ ldr(rscratch1, buffer); + __ add(tmp, tmp, rscratch1); // tmp := tmp + *buffer_adr + + // Record the previous value + __ str(pre_val, Address(tmp)); + __ b(done); + + __ bind(runtime); + // save the live input values + __ push(r0->bit(tosca_live) | obj.reg_bits() | pre_val->bit(true) | lr->bit(true), sp); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(rfp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + if (expand_call) { + assert(pre_val != c_rarg1, "smashed arg"); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + } + + __ pop(r0->bit(tosca_live) | obj.reg_bits() | pre_val->bit(true) | lr->bit(true), sp); + + __ bind(done); +} + +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Address store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { + assert(thread == rthread, "must be"); + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // Does store cross heap regions? + + __ lea(tmp2, store_addr); + __ eor(tmp, tmp2, new_val); + __ lsrs(tmp, tmp, HeapRegion::LogOfHRGrainBytes); + __ b(done, Assembler::EQ); + + // crosses regions, storing NULL? + + __ cbz(new_val, done); + + // storing region crossing non-NULL, is card already dirty? + + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + const Register card_addr = tmp; + + __ lsr(card_addr, tmp2, CardTable::card_shift); + + //ExternalAddress cardtable((address) ct->byte_map_base()); + __ mov(tmp2, (unsigned)ct->byte_map_base()); + + // get the address of the card + __ add(card_addr, card_addr, tmp2); + __ ldrb(tmp2, Address(card_addr)); + __ cmp(tmp2, (int)G1CardTable::g1_young_card_val()); + __ b(done, Assembler::EQ); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(Assembler::StoreLoad); + + __ ldrb(tmp2, Address(card_addr)); + __ cbz(tmp2, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + __ mov(rscratch1, 0); + __ strb(rscratch1, Address(card_addr)); + + __ ldr(rscratch1, queue_index); + __ cbz(rscratch1, runtime); + __ sub(rscratch1, rscratch1, wordSize); + __ str(rscratch1, queue_index); + + __ ldr(tmp2, buffer); + __ str(card_addr, Address(tmp2, rscratch1)); + __ b(done); + + __ bind(runtime); + // save the live input values + __ push(store_addr.reg_bits() | new_val->bit(true), sp); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop(store_addr.reg_bits() | new_val->bit(true), sp); + + __ bind(done); +} + +void G1BarrierSetAssembler::load_word_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp_thread) { + bool on_oop = type == T_OBJECT || type == T_ARRAY; + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBarrierSetAssembler::load_word_at(masm, decorators, type, dst, src, tmp1, tmp_thread); + if (on_oop && on_reference) { + // LR is live. It must be saved around calls. + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + Address() /* obj */, + dst /* pre_val */, + rthread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + } +} + +void G1BarrierSetAssembler::load_tos_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address src, Register tmp1, Register tmp_thread) { + bool on_oop = type == T_OBJECT || type == T_ARRAY; + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + ModRefBarrierSetAssembler::load_tos_at(masm, decorators, type, src, tmp1, tmp_thread); + if (on_oop && on_reference) { + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm /* masm */, + Address() /* obj */, + r0 /* pre_val */, // atos is in r0 + rthread /* thread */, + tmp1 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + } +} + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address dst, Register val, Register tmp1, Register tmp2) { + g1_write_barrier_pre(masm, + dst /* obj */, + tmp2 /* pre_val */, + rthread /* thread */, + tmp1 /* tmp */, + val != noreg /* tosca_live */, + false /* expand_call */); + + if (val == noreg) { + BarrierSetAssembler::store_word_at(masm, decorators, type, dst, noreg, tmp1, noreg); + } else { + BarrierSetAssembler::store_word_at(masm, decorators, type, dst, val, noreg, noreg); + g1_write_barrier_post(masm, + dst /* store_adr */, + val /* new_val */, + rthread /* thread */, + tmp1 /* tmp */, + tmp2 /* tmp2 */); + } + +} + +#ifdef COMPILER1 + +#undef __ +#define __ ce->masm()-> + +void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + // At this point we know that marking is in progress. + // If do_load() is true then we have to emit the + // load of the previous value; otherwise it has already + // been loaded into _pre_val. + + __ bind(*stub->entry()); + + assert(stub->pre_val()->is_register(), "Precondition."); + + Register pre_val_reg = stub->pre_val()->as_register(); + + if (stub->do_load()) { + ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/); + } + __ cbz(pre_val_reg, *stub->continuation()); + ce->store_parameter(stub->pre_val()->as_register(), 0); + __ far_call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin())); + __ b(*stub->continuation()); +} + +void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) { + G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1(); + __ bind(*stub->entry()); + assert(stub->addr()->is_register(), "Precondition."); + assert(stub->new_val()->is_register(), "Precondition."); + Register new_val_reg = stub->new_val()->as_register(); + __ cbz(new_val_reg, *stub->continuation()); + ce->store_parameter(stub->addr()->as_pointer_register(), 0); + __ far_call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin())); + __ b(*stub->continuation()); +} + +#undef __ + +#define __ sasm-> + +void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_pre_barrier", false); + + // arg0 : previous value of memory + + BarrierSet* bs = BarrierSet::barrier_set(); + + const Register pre_val = r0; + const Register thread = rthread; + const Register tmp = rscratch1; + + Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + Label done; + Label runtime; + + // Is marking still active? + if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { + __ ldr(tmp, in_progress); + } else { + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); + __ ldrb(tmp, in_progress); + } + __ cbz(tmp, done); + + // Can we store original value in the thread's buffer? + __ ldr(tmp, queue_index); + __ cbz(tmp, runtime); + + __ sub(tmp, tmp, wordSize); + __ str(tmp, queue_index); + __ ldr(rscratch2, buffer); + __ add(tmp, tmp, rscratch2); + __ load_parameter(0, rscratch2); + __ str(rscratch2, Address(tmp, 0)); + __ b(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ load_parameter(0, pre_val); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + + __ epilogue(); +} + +void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) { + __ prologue("g1_post_barrier", false); + + // arg0: store_address + Address store_addr(rfp, 2*BytesPerWord); + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + Label done; + Label runtime; + + // At this point we know new_value is non-NULL and the new_value crosses regions. + // Must check to see if card is already dirty + + const Register thread = rthread; + + Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + const Register card_addr = rscratch2; + ExternalAddress cardtable((address) ct->byte_map_base()); + + __ load_parameter(0, card_addr); + __ lsr(card_addr, card_addr, CardTable::card_shift); + __ mov(rscratch1, cardtable); + __ add(card_addr, card_addr, rscratch1); + __ ldrb(rscratch1, Address(card_addr)); + __ cmp(rscratch1, (int)G1CardTable::g1_young_card_val()); + __ b(done, Assembler::EQ); + + assert((int)CardTable::dirty_card_val() == 0, "must be 0"); + + __ membar(Assembler::StoreLoad); + __ ldrb(rscratch1, Address(card_addr)); + __ cbz(rscratch1, done); + + // storing region crossing non-NULL, card is clean. + // dirty card and log. + __ mov(rscratch1, 0); + __ strb(rscratch1, Address(card_addr)); + + __ ldr(rscratch1, queue_index); + __ cbz(rscratch1, runtime); + __ sub(rscratch1, rscratch1, wordSize); + __ str(rscratch1, queue_index); + + // Reuse LR to hold buffer_addr + const Register buffer_addr = lr; + + __ ldr(buffer_addr, buffer); + __ str(card_addr, Address(buffer_addr, rscratch1)); + __ b(done); + + __ bind(runtime); + __ push_call_clobbered_registers(); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread); + __ pop_call_clobbered_registers(); + __ bind(done); + __ epilogue(); +} + +#undef __ + +#endif // COMPILER1