--- old/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp 2018-04-30 11:27:52.384963292 +0200 +++ new/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.cpp 2018-04-30 11:27:52.276909292 +0200 @@ -28,6 +28,7 @@ #include "gc/g1/g1BarrierSetAssembler.hpp" #include "gc/g1/g1ThreadLocalData.hpp" #include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" #include "gc/g1/heapRegion.hpp" #include "interpreter/interp_masm.hpp" #include "runtime/sharedRuntime.hpp" @@ -127,6 +128,239 @@ #endif // !AARCH64 } +// G1 pre-barrier. +// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +// If store_addr != noreg, then previous value is loaded from [store_addr]; +// in such case store_addr and new_val registers are preserved; +// otherwise pre_val register is preserved. +void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register pre_val, + Register tmp1, + Register tmp2) { + Label done; + Label runtime; + + if (store_addr != noreg) { + assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); + } else { + assert (new_val == noreg, "should be"); + assert_different_registers(pre_val, tmp1, tmp2, noreg); + } + + Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); + Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); + + // Is marking active? + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); + __ ldrb(tmp1, in_progress); + __ cbz(tmp1, done); + + // Do we need to load the previous value? + if (store_addr != noreg) { + __ load_heap_oop(pre_val, Address(store_addr, 0)); + } + + // Is the previous value null? + __ cbz(pre_val, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + __ ldr(tmp1, index); // tmp1 := *index_adr + __ ldr(tmp2, buffer); + + __ subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize + __ b(runtime, lt); // If negative, goto runtime + + __ str(tmp1, index); // *index_adr := tmp1 + + // Record the previous value + __ str(pre_val, Address(tmp2, tmp1)); + __ b(done); + + __ bind(runtime); + + // save the live input values +#ifdef AARCH64 + if (store_addr != noreg) { + __ raw_push(store_addr, new_val); + } else { + __ raw_push(pre_val, ZR); + } +#else + if (store_addr != noreg) { + // avoid raw_push to support any ordering of store_addr and new_val + __ push(RegisterSet(store_addr) | RegisterSet(new_val)); + } else { + __ push(pre_val); + } +#endif // AARCH64 + + if (pre_val != R0) { + __ mov(R0, pre_val); + } + __ mov(R1, Rthread); + + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); + +#ifdef AARCH64 + if (store_addr != noreg) { + __ raw_pop(store_addr, new_val); + } else { + __ raw_pop(pre_val, ZR); + } +#else + if (store_addr != noreg) { + __ pop(RegisterSet(store_addr) | RegisterSet(new_val)); + } else { + __ pop(pre_val); + } +#endif // AARCH64 + + __ bind(done); +} + +// G1 post-barrier. +// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). +void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + Register tmp3) { + + Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); + Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + Label done; + Label runtime; + + // Does store cross heap regions? + + __ eor(tmp1, store_addr, new_val); +#ifdef AARCH64 + __ logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); + __ cbz(tmp1, done); +#else + __ movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); + __ b(done, eq); +#endif + + // crosses regions, storing NULL? + + __ cbz(new_val, done); + + // storing region crossing non-NULL, is card already dirty? + const Register card_addr = tmp1; + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); + + __ mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference); + __ add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift)); + + __ ldrb(tmp2, Address(card_addr)); + __ cmp(tmp2, (int)G1CardTable::g1_young_card_val()); + __ b(done, eq); + + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); + + assert(CardTable::dirty_card_val() == 0, "adjust this code"); + __ ldrb(tmp2, Address(card_addr)); + __ cbz(tmp2, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + __ strb(__ zero_register(tmp2), Address(card_addr)); + + __ ldr(tmp2, queue_index); + __ ldr(tmp3, buffer); + + __ subs(tmp2, tmp2, wordSize); + __ b(runtime, lt); // go to runtime if now negative + + __ str(tmp2, queue_index); + + __ str(card_addr, Address(tmp3, tmp2)); + __ b(done); + + __ bind(runtime); + + if (card_addr != R0) { + __ mov(R0, card_addr); + } + __ mov(R1, Rthread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); + + __ bind(done); +} + +void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) { + bool on_oop = type == T_OBJECT || type == T_ARRAY; + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; + bool on_reference = on_weak || on_phantom; + + ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp2, tmp3); + if (on_oop && on_reference) { + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + g1_write_barrier_pre(masm, noreg, noreg, dst, tmp1, tmp2); + } +} + + +void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) { + bool in_heap = (decorators & IN_HEAP) != 0; + bool in_concurrent_root = (decorators & IN_CONCURRENT_ROOT) != 0; + + bool needs_pre_barrier = in_heap || in_concurrent_root; + bool needs_post_barrier = (new_val != noreg) && in_heap; + + // flatten object address if needed + assert (obj.mode() == basic_offset, "pre- or post-indexing is not supported here"); + + const Register store_addr = obj.base(); + if (obj.index() != noreg) { + assert (obj.disp() == 0, "index or displacement, not both"); +#ifdef AARCH64 + __ add(store_addr, obj.base(), obj.index(), obj.extend(), obj.shift_imm()); +#else + assert(obj.offset_op() == add_offset, "addition is expected"); + __ add(store_addr, obj.base(), AsmOperand(obj.index(), obj.shift(), obj.shift_imm())); +#endif // AARCH64 + } else if (obj.disp() != 0) { + __ add(store_addr, obj.base(), obj.disp()); + } + + if (needs_pre_barrier) { + g1_write_barrier_pre(masm, store_addr, new_val, tmp1, tmp2, tmp3); + } + + if (is_null) { + BarrierSetAssembler::store_at(masm, decorators, type, Address(store_addr), new_val, tmp1, tmp2, tmp3, true); + } else { + // G1 barrier needs uncompressed oop for region cross check. + Register val_to_store = new_val; + if (UseCompressedOops) { + val_to_store = tmp1; + __ mov(val_to_store, new_val); + } + BarrierSetAssembler::store_at(masm, decorators, type, Address(store_addr), val_to_store, tmp1, tmp2, tmp3, false); + if (needs_post_barrier) { + g1_write_barrier_post(masm, store_addr, new_val, tmp1, tmp2, tmp3); + } + } +}; + #ifdef COMPILER1 #undef __ --- old/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp 2018-04-30 11:27:52.777159293 +0200 +++ new/src/hotspot/cpu/arm/gc/g1/g1BarrierSetAssembler_arm.hpp 2018-04-30 11:27:52.669105296 +0200 @@ -41,6 +41,27 @@ void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); + void g1_write_barrier_pre(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register pre_val, + Register tmp1, + Register tmp2); + + void g1_write_barrier_post(MacroAssembler* masm, + Register store_addr, + Register new_val, + Register tmp1, + Register tmp2, + Register tmp3); + + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null); + +public: + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp2, Register tmp3); + #ifdef COMPILER1 public: void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); --- old/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp 2018-04-30 11:27:53.173357298 +0200 +++ new/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp 2018-04-30 11:27:53.065303298 +0200 @@ -36,6 +36,11 @@ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, Register addr, Register count, Register tmp) {} + virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp2, Register tmp3); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null); + virtual void barrier_stubs_init() {} }; --- old/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp 2018-04-30 11:27:53.565553299 +0200 +++ new/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.cpp 2018-04-30 11:27:53.457499299 +0200 @@ -72,3 +72,107 @@ __ b(L_cardtable_loop, ge); __ BIND(L_done); } + +void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) { + bool on_array = (decorators & IN_HEAP_ARRAY) != 0; + bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool precise = on_array || on_anonymous; + + if (is_null) { + BarrierSetAssembler::store_at(masm, decorators, type, obj, new_val, tmp1, tmp2, tmp3, true); + } else { + assert (!precise || (obj.index() == noreg && obj.disp() == 0), + "store check address should be calculated beforehand"); + + store_check_part1(masm, tmp1); + BarrierSetAssembler::store_at(masm, decorators, type, obj, new_val, tmp1, tmp2, tmp3, false); + new_val = noreg; + store_check_part2(masm, obj.base(), tmp1, tmp2); + } +} + +// The 1st part of the store check. +// Sets card_table_base register. +void CardTableBarrierSetAssembler::store_check_part1(MacroAssembler* masm, Register card_table_base) { + // Check barrier set type (should be card table) and element size + BarrierSet* bs = BarrierSet::barrier_set(); + assert(bs->kind() == BarrierSet::CardTableBarrierSet, + "Wrong barrier set kind"); + + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "Adjust store check code"); + + // Load card table base address. + + /* Performance note. + + There is an alternative way of loading card table base address + from thread descriptor, which may look more efficient: + + ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset())); + + However, performance measurements of micro benchmarks and specJVM98 + showed that loading of card table base from thread descriptor is + 7-18% slower compared to loading of literal embedded into the code. + Possible cause is a cache miss (card table base address resides in a + rarely accessed area of thread descriptor). + */ + // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64 + __ mov_address(card_table_base, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference); +} + +// The 2nd part of the store check. +void CardTableBarrierSetAssembler::store_check_part2(MacroAssembler* masm, Register obj, Register card_table_base, Register tmp) { + assert_different_registers(obj, card_table_base, tmp); + + assert(CardTable::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations."); +#ifdef AARCH64 + add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTable::card_shift)); + Address card_table_addr(card_table_base); +#else + Address card_table_addr(card_table_base, obj, lsr, CardTable::card_shift); +#endif + + if (UseCondCardMark) { +#if INCLUDE_ALL_GCS + if (UseConcMarkSweepGC) { + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), noreg); + } +#endif + Label already_dirty; + + __ ldrb(tmp, card_table_addr); + __ cbz(tmp, already_dirty); + + set_card(masm, card_table_base, card_table_addr, tmp); + __ bind(already_dirty); + + } else { +#if INCLUDE_ALL_GCS + if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { + __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg); + } +#endif + set_card(masm, card_table_base, card_table_addr, tmp); + } +} + +void CardTableBarrierSetAssembler::set_card(MacroAssembler* masm, Register card_table_base, Address card_table_addr, Register tmp) { +#ifdef AARCH64 + strb(ZR, card_table_addr); +#else + CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set()); + CardTable* ct = ctbs->card_table(); + if ((((uintptr_t)ct->byte_map_base() & 0xff) == 0)) { + // Card table is aligned so the lowest byte of the table address base is zero. + // This works only if the code is not saved for later use, possibly + // in a context where the base would no longer be aligned. + __ strb(card_table_base, card_table_addr); + } else { + __ mov(tmp, 0); + __ strb(tmp, card_table_addr); + } +#endif // AARCH64 +} --- old/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.hpp 2018-04-30 11:27:53.961751304 +0200 +++ new/src/hotspot/cpu/arm/gc/shared/cardTableBarrierSetAssembler_arm.hpp 2018-04-30 11:27:53.853697304 +0200 @@ -29,9 +29,18 @@ #include "gc/shared/modRefBarrierSetAssembler.hpp" class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { +private: + void store_check(MacroAssembler* masm, Register obj, Address dst); + void store_check_part1(MacroAssembler* masm, Register card_table_base); + void store_check_part2(MacroAssembler* masm, Register obj, Register card_table_base, Register tmp); + + void set_card(MacroAssembler* masm, Register card_table_base, Address card_table_addr, Register tmp); + protected: virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null); }; #endif // #ifndef CPU_ARM_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_ARM_HPP --- old/src/hotspot/cpu/arm/gc/shared/modRefBarrierSetAssembler_arm.cpp 2018-04-30 11:27:54.357949305 +0200 +++ new/src/hotspot/cpu/arm/gc/shared/modRefBarrierSetAssembler_arm.cpp 2018-04-30 11:27:54.245893306 +0200 @@ -42,3 +42,12 @@ gen_write_ref_array_post_barrier(masm, decorators, addr, count, tmp); } } + +void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) { + if (type == T_OBJECT || type == T_ARRAY) { + oop_store_at(masm, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); + } else { + BarrierSetAssembler::store_at(masm, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); + } +} --- old/src/hotspot/cpu/arm/gc/shared/modRefBarrierSetAssembler_arm.hpp 2018-04-30 11:27:54.754147310 +0200 +++ new/src/hotspot/cpu/arm/gc/shared/modRefBarrierSetAssembler_arm.hpp 2018-04-30 11:27:54.642091310 +0200 @@ -28,6 +28,10 @@ #include "asm/macroAssembler.hpp" #include "gc/shared/barrierSetAssembler.hpp" +// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other +// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected +// accesses, which are overridden in the concrete BarrierSetAssembler. + class ModRefBarrierSetAssembler: public BarrierSetAssembler { protected: virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, @@ -35,11 +39,16 @@ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp) {} + virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address obj, Register val, Register tmp1, Register tmp2, Register tmp3, bool is_null) = 0; + public: virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, Register addr, Register count, int callee_saved_regs); virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, Register addr, Register count, Register tmp); + virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address obj, Register val, Register tmp1, Register tmp2, Register tmp3, bool is_null); }; #endif // CPU_ARM_GC_SHARED_MODREFBARRIERSETASSEMBLER_ARM_HPP --- old/src/hotspot/cpu/arm/interp_masm_arm.cpp 2018-04-30 11:27:55.154347311 +0200 +++ new/src/hotspot/cpu/arm/interp_masm_arm.cpp 2018-04-30 11:27:55.042291312 +0200 @@ -406,91 +406,6 @@ } -// The 1st part of the store check. -// Sets card_table_base register. -void InterpreterMacroAssembler::store_check_part1(Register card_table_base) { - // Check barrier set type (should be card table) and element size - BarrierSet* bs = BarrierSet::barrier_set(); - assert(bs->kind() == BarrierSet::CardTableBarrierSet, - "Wrong barrier set kind"); - - CardTableBarrierSet* ctbs = barrier_set_cast(bs); - CardTable* ct = ctbs->card_table(); - assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "Adjust store check code"); - - // Load card table base address. - - /* Performance note. - - There is an alternative way of loading card table base address - from thread descriptor, which may look more efficient: - - ldr(card_table_base, Address(Rthread, JavaThread::card_table_base_offset())); - - However, performance measurements of micro benchmarks and specJVM98 - showed that loading of card table base from thread descriptor is - 7-18% slower compared to loading of literal embedded into the code. - Possible cause is a cache miss (card table base address resides in a - rarely accessed area of thread descriptor). - */ - // TODO-AARCH64 Investigate if mov_slow is faster than ldr from Rthread on AArch64 - mov_address(card_table_base, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference); -} - -// The 2nd part of the store check. -void InterpreterMacroAssembler::store_check_part2(Register obj, Register card_table_base, Register tmp) { - assert_different_registers(obj, card_table_base, tmp); - - assert(CardTable::dirty_card_val() == 0, "Dirty card value must be 0 due to optimizations."); -#ifdef AARCH64 - add(card_table_base, card_table_base, AsmOperand(obj, lsr, CardTable::card_shift)); - Address card_table_addr(card_table_base); -#else - Address card_table_addr(card_table_base, obj, lsr, CardTable::card_shift); -#endif - - if (UseCondCardMark) { -#if INCLUDE_ALL_GCS - if (UseConcMarkSweepGC) { - membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), noreg); - } -#endif - Label already_dirty; - - ldrb(tmp, card_table_addr); - cbz(tmp, already_dirty); - - set_card(card_table_base, card_table_addr, tmp); - bind(already_dirty); - - } else { -#if INCLUDE_ALL_GCS - if (UseConcMarkSweepGC && CMSPrecleaningEnabled) { - membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreStore), noreg); - } -#endif - set_card(card_table_base, card_table_addr, tmp); - } -} - -void InterpreterMacroAssembler::set_card(Register card_table_base, Address card_table_addr, Register tmp) { -#ifdef AARCH64 - strb(ZR, card_table_addr); -#else - CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set()); - CardTable* ct = ctbs->card_table(); - if ((((uintptr_t)ct->byte_map_base() & 0xff) == 0)) { - // Card table is aligned so the lowest byte of the table address base is zero. - // This works only if the code is not saved for later use, possibly - // in a context where the base would no longer be aligned. - strb(card_table_base, card_table_addr); - } else { - mov(tmp, 0); - strb(tmp, card_table_addr); - } -#endif // AARCH64 -} - ////////////////////////////////////////////////////////////////////////////////// --- old/src/hotspot/cpu/arm/interp_masm_arm.hpp 2018-04-30 11:27:55.558549316 +0200 +++ new/src/hotspot/cpu/arm/interp_masm_arm.hpp 2018-04-30 11:27:55.446493313 +0200 @@ -144,11 +144,6 @@ // load cpool->resolved_klass_at(index); Rtemp is corrupted upon return void load_resolved_klass_at_offset(Register Rcpool, Register Rindex, Register Rklass); - void store_check_part1(Register card_table_base); // Sets card_table_base register. - void store_check_part2(Register obj, Register card_table_base, Register tmp); - - void set_card(Register card_table_base, Address card_table_addr, Register tmp); - void pop_ptr(Register r); void pop_i(Register r = R0_tos); #ifdef AARCH64 --- old/src/hotspot/cpu/arm/macroAssembler_arm.cpp 2018-04-30 11:27:55.954747317 +0200 +++ new/src/hotspot/cpu/arm/macroAssembler_arm.cpp 2018-04-30 11:27:55.842691318 +0200 @@ -31,6 +31,7 @@ #include "compiler/disassembler.hpp" #include "gc/shared/barrierSet.hpp" #include "gc/shared/cardTable.hpp" +#include "gc/shared/barrierSetAssembler.hpp" #include "gc/shared/cardTableBarrierSet.hpp" #include "gc/shared/collectedHeap.inline.hpp" #include "interpreter/interpreter.hpp" @@ -2126,27 +2127,22 @@ void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + assert_different_registers(value, tmp1, tmp2); Label done, not_weak; cbz(value, done); // Use NULL as-is. STATIC_ASSERT(JNIHandles::weak_tag_mask == 1u); tbz(value, 0, not_weak); // Test for jweak tag. + // Resolve jweak. - ldr(value, Address(value, -JNIHandles::weak_tag_value)); - verify_oop(value); -#if INCLUDE_ALL_GCS - if (UseG1GC) { - g1_write_barrier_pre(noreg, // store_addr - noreg, // new_val - value, // pre_val - tmp1, // tmp1 - tmp2); // tmp2 - } -#endif // INCLUDE_ALL_GCS + bs->load_at(this, IN_ROOT | ON_PHANTOM_OOP_REF, T_OBJECT, + value, Address(value, -JNIHandles::weak_tag_value), tmp1, tmp2, noreg); b(done); bind(not_weak); // Resolve (untagged) jobject. - ldr(value, Address(value)); + bs->load_at(this, IN_ROOT | IN_CONCURRENT_ROOT, T_OBJECT, + value, Address(value, 0), tmp1, tmp2, noreg); verify_oop(value); bind(done); } @@ -2154,183 +2150,6 @@ ////////////////////////////////////////////////////////////////////////////////// -#if INCLUDE_ALL_GCS - -// G1 pre-barrier. -// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). -// If store_addr != noreg, then previous value is loaded from [store_addr]; -// in such case store_addr and new_val registers are preserved; -// otherwise pre_val register is preserved. -void MacroAssembler::g1_write_barrier_pre(Register store_addr, - Register new_val, - Register pre_val, - Register tmp1, - Register tmp2) { - Label done; - Label runtime; - - if (store_addr != noreg) { - assert_different_registers(store_addr, new_val, pre_val, tmp1, tmp2, noreg); - } else { - assert (new_val == noreg, "should be"); - assert_different_registers(pre_val, tmp1, tmp2, noreg); - } - - Address in_progress(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); - Address index(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); - Address buffer(Rthread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); - - // Is marking active? - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "adjust this code"); - ldrb(tmp1, in_progress); - cbz(tmp1, done); - - // Do we need to load the previous value? - if (store_addr != noreg) { - load_heap_oop(pre_val, Address(store_addr, 0)); - } - - // Is the previous value null? - cbz(pre_val, done); - - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) - - ldr(tmp1, index); // tmp1 := *index_adr - ldr(tmp2, buffer); - - subs(tmp1, tmp1, wordSize); // tmp1 := tmp1 - wordSize - b(runtime, lt); // If negative, goto runtime - - str(tmp1, index); // *index_adr := tmp1 - - // Record the previous value - str(pre_val, Address(tmp2, tmp1)); - b(done); - - bind(runtime); - - // save the live input values -#ifdef AARCH64 - if (store_addr != noreg) { - raw_push(store_addr, new_val); - } else { - raw_push(pre_val, ZR); - } -#else - if (store_addr != noreg) { - // avoid raw_push to support any ordering of store_addr and new_val - push(RegisterSet(store_addr) | RegisterSet(new_val)); - } else { - push(pre_val); - } -#endif // AARCH64 - - if (pre_val != R0) { - mov(R0, pre_val); - } - mov(R1, Rthread); - - call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), R0, R1); - -#ifdef AARCH64 - if (store_addr != noreg) { - raw_pop(store_addr, new_val); - } else { - raw_pop(pre_val, ZR); - } -#else - if (store_addr != noreg) { - pop(RegisterSet(store_addr) | RegisterSet(new_val)); - } else { - pop(pre_val); - } -#endif // AARCH64 - - bind(done); -} - -// G1 post-barrier. -// Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). -void MacroAssembler::g1_write_barrier_post(Register store_addr, - Register new_val, - Register tmp1, - Register tmp2, - Register tmp3) { - - Address queue_index(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); - Address buffer(Rthread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); - - BarrierSet* bs = BarrierSet::barrier_set(); - CardTableBarrierSet* ctbs = barrier_set_cast(bs); - CardTable* ct = ctbs->card_table(); - Label done; - Label runtime; - - // Does store cross heap regions? - - eor(tmp1, store_addr, new_val); -#ifdef AARCH64 - logical_shift_right(tmp1, tmp1, HeapRegion::LogOfHRGrainBytes); - cbz(tmp1, done); -#else - movs(tmp1, AsmOperand(tmp1, lsr, HeapRegion::LogOfHRGrainBytes)); - b(done, eq); -#endif - - // crosses regions, storing NULL? - - cbz(new_val, done); - - // storing region crossing non-NULL, is card already dirty? - const Register card_addr = tmp1; - assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); - - mov_address(tmp2, (address)ct->byte_map_base(), symbolic_Relocation::card_table_reference); - add(card_addr, tmp2, AsmOperand(store_addr, lsr, CardTable::card_shift)); - - ldrb(tmp2, Address(card_addr)); - cmp(tmp2, (int)G1CardTable::g1_young_card_val()); - b(done, eq); - - membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad), tmp2); - - assert(CardTable::dirty_card_val() == 0, "adjust this code"); - ldrb(tmp2, Address(card_addr)); - cbz(tmp2, done); - - // storing a region crossing, non-NULL oop, card is clean. - // dirty card and log. - - strb(zero_register(tmp2), Address(card_addr)); - - ldr(tmp2, queue_index); - ldr(tmp3, buffer); - - subs(tmp2, tmp2, wordSize); - b(runtime, lt); // go to runtime if now negative - - str(tmp2, queue_index); - - str(card_addr, Address(tmp3, tmp2)); - b(done); - - bind(runtime); - - if (card_addr != R0) { - mov(R0, card_addr); - } - mov(R1, Rthread); - call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), R0, R1); - - bind(done); -} - -#endif // INCLUDE_ALL_GCS - -////////////////////////////////////////////////////////////////////////////////// - #ifdef AARCH64 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed) { @@ -2873,38 +2692,39 @@ #endif // AARCH64 -void MacroAssembler::load_heap_oop(Register dst, Address src) { -#ifdef AARCH64 - if (UseCompressedOops) { - ldr_w(dst, src); - decode_heap_oop(dst); - return; - } -#endif // AARCH64 - ldr(dst, src); +void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { + access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3); } // Blows src and flags. -void MacroAssembler::store_heap_oop(Register src, Address dst) { -#ifdef AARCH64 - if (UseCompressedOops) { - assert(!dst.uses(src), "not enough registers"); - encode_heap_oop(src); - str_w(src, dst); - return; +void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false); +} + +void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) { + access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true); +} + +void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, + Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); + } else { + bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3); } -#endif // AARCH64 - str(src, dst); } -void MacroAssembler::store_heap_oop_null(Register src, Address dst) { -#ifdef AARCH64 - if (UseCompressedOops) { - str_w(src, dst); - return; +void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, + Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) { + BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); + bool as_raw = (decorators & AS_RAW) != 0; + if (as_raw) { + bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); + } else { + bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null); } -#endif // AARCH64 - str(src, dst); } --- old/src/hotspot/cpu/arm/macroAssembler_arm.hpp 2018-04-30 11:27:56.370955319 +0200 +++ new/src/hotspot/cpu/arm/macroAssembler_arm.hpp 2018-04-30 11:27:56.262901319 +0200 @@ -401,27 +401,6 @@ void resolve_jobject(Register value, Register tmp1, Register tmp2); -#if INCLUDE_ALL_GCS - // G1 pre-barrier. - // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). - // If store_addr != noreg, then previous value is loaded from [store_addr]; - // in such case store_addr and new_val registers are preserved; - // otherwise pre_val register is preserved. - void g1_write_barrier_pre(Register store_addr, - Register new_val, - Register pre_val, - Register tmp1, - Register tmp2); - - // G1 post-barrier. - // Blows all volatile registers (R0-R3 on 32-bit ARM, R0-R18 on AArch64, Rtemp, LR). - void g1_write_barrier_post(Register store_addr, - Register new_val, - Register tmp1, - Register tmp2, - Register tmp3); -#endif // INCLUDE_ALL_GCS - #ifndef AARCH64 void nop() { mov(R0, R0); @@ -1072,12 +1051,12 @@ // oop manipulations - void load_heap_oop(Register dst, Address src); - void store_heap_oop(Register src, Address dst); - void store_heap_oop(Address dst, Register src) { - store_heap_oop(src, dst); - } - void store_heap_oop_null(Register src, Address dst); + void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); + void store_heap_oop(Address obj, Register new_val, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); + void store_heap_oop_null(Address obj, Register new_val, Register tmp1 = noreg, Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0); + + void access_load_at(BasicType type, DecoratorSet decorators, Address src, Register dst, Register tmp1, Register tmp2, Register tmp3); + void access_store_at(BasicType type, DecoratorSet decorators, Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null); #ifdef AARCH64 void encode_heap_oop(Register dst, Register src); --- old/src/hotspot/cpu/arm/stubGenerator_arm.cpp 2018-04-30 11:27:56.779159322 +0200 +++ new/src/hotspot/cpu/arm/stubGenerator_arm.cpp 2018-04-30 11:27:56.671105321 +0200 @@ -3260,7 +3260,7 @@ __ align(OptoLoopAlignment); __ BIND(store_element); if (UseCompressedOops) { - __ store_heap_oop(R5, Address(to, BytesPerHeapOop, post_indexed)); // store the oop, changes flags + __ store_heap_oop(Address(to, BytesPerHeapOop, post_indexed), R5); // store the oop, changes flags __ subs_32(count,count,1); } else { __ subs_32(count,count,1); --- old/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp 2018-04-30 11:27:57.199369327 +0200 +++ new/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp 2018-04-30 11:27:57.087313324 +0200 @@ -852,80 +852,53 @@ // address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { -#if INCLUDE_ALL_GCS - if (UseG1GC) { - // Code: _aload_0, _getfield, _areturn - // parameter size = 1 - // - // The code that gets generated by this routine is split into 2 parts: - // 1. The "intrinsified" code for G1 (or any SATB based GC), - // 2. The slow path - which is an expansion of the regular method entry. - // - // Notes:- - // * In the G1 code we do not check whether we need to block for - // a safepoint. If G1 is enabled then we must execute the specialized - // code for Reference.get (except when the Reference object is null) - // so that we can log the value in the referent field with an SATB - // update buffer. - // If the code for the getfield template is modified so that the - // G1 pre-barrier code is executed when the current method is - // Reference.get() then going through the normal method entry - // will be fine. - // * The G1 code can, however, check the receiver object (the instance - // of java.lang.Reference) and jump to the slow path if null. If the - // Reference object is null then we obviously cannot fetch the referent - // and so we don't need to call the G1 pre-barrier. Thus we can use the - // regular method entry code to generate the NPE. - // - // This code is based on generate_accessor_enty. - // - // Rmethod: Method* - // Rthread: thread - // Rsender_sp: sender sp, must be preserved for slow path, set SP to it on fast path - // Rparams: parameters - - address entry = __ pc(); - Label slow_path; - const Register Rthis = R0; - const Register Rret_addr = Rtmp_save1; - assert_different_registers(Rthis, Rret_addr, Rsender_sp); - - const int referent_offset = java_lang_ref_Reference::referent_offset; - guarantee(referent_offset > 0, "referent offset not initialized"); - - // Check if local 0 != NULL - // If the receiver is null then it is OK to jump to the slow path. - __ ldr(Rthis, Address(Rparams)); - __ cbz(Rthis, slow_path); - - // Generate the G1 pre-barrier code to log the value of - // the referent field in an SATB buffer. - - // Load the value of the referent field. - __ load_heap_oop(R0, Address(Rthis, referent_offset)); - - // Preserve LR - __ mov(Rret_addr, LR); - - __ g1_write_barrier_pre(noreg, // store_addr - noreg, // new_val - R0, // pre_val - Rtemp, // tmp1 - R1_tmp); // tmp2 - - // _areturn - __ mov(SP, Rsender_sp); - __ ret(Rret_addr); - - // generate a vanilla interpreter entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); - return entry; - } -#endif // INCLUDE_ALL_GCS - - // If G1 is not enabled then attempt to go through the normal entry point - return NULL; + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code performing an ON_WEAK_OOP_REF load, + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * An intrinsic is always executed, where an ON_WEAK_OOP_REF load is performed. + // * We may jump to the slow path iff the receiver is null. If the + // Reference object is null then we no longer perform an ON_WEAK_OOP_REF load + // Thus we can use the regular method entry code to generate the NPE. + // + // Rmethod: Method* + // Rthread: thread + // Rsender_sp: sender sp, must be preserved for slow path, set SP to it on fast path + // Rparams: parameters + + address entry = __ pc(); + Label slow_path; + const Register Rthis = R0; + const Register Rret_addr = Rtmp_save1; + assert_different_registers(Rthis, Rret_addr, Rsender_sp); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ldr(Rthis, Address(Rparams)); + __ cbz(Rthis, slow_path); + + // Preserve LR + __ mov(Rret_addr, LR); + + // Load the value of the referent field. + const Address field_address(Rthis, referent_offset); + __ load_heap_oop(R0, field_address, R0, Rtemp, R1_tmp); + + // _areturn + __ mov(SP, Rsender_sp); + __ ret(Rret_addr); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + return entry; } // Not supported --- old/src/hotspot/cpu/arm/templateTable_arm.cpp 2018-04-30 11:27:57.603571330 +0200 +++ new/src/hotspot/cpu/arm/templateTable_arm.cpp 2018-04-30 11:27:57.495517330 +0200 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" +#include "gc/shared/barrierSetAssembler.hpp" #include "interpreter/interp_masm.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" @@ -187,70 +188,18 @@ Register tmp1, Register tmp2, Register tmp3, - BarrierSet::Name barrier, - bool precise, - bool is_null) { + bool is_null, + DecoratorSet decorators = 0) { assert_different_registers(obj.base(), new_val, tmp1, tmp2, tmp3, noreg); - switch (barrier) { -#if INCLUDE_ALL_GCS - case BarrierSet::G1BarrierSet: - { - // flatten object address if needed - assert (obj.mode() == basic_offset, "pre- or post-indexing is not supported here"); - - const Register store_addr = obj.base(); - if (obj.index() != noreg) { - assert (obj.disp() == 0, "index or displacement, not both"); -#ifdef AARCH64 - __ add(store_addr, obj.base(), obj.index(), obj.extend(), obj.shift_imm()); -#else - assert(obj.offset_op() == add_offset, "addition is expected"); - __ add(store_addr, obj.base(), AsmOperand(obj.index(), obj.shift(), obj.shift_imm())); -#endif // AARCH64 - } else if (obj.disp() != 0) { - __ add(store_addr, obj.base(), obj.disp()); - } + __ store_heap_oop(obj, new_val, tmp1, tmp2, tmp3, is_null); +} - __ g1_write_barrier_pre(store_addr, new_val, tmp1, tmp2, tmp3); - if (is_null) { - __ store_heap_oop_null(new_val, Address(store_addr)); - } else { - // G1 barrier needs uncompressed oop for region cross check. - Register val_to_store = new_val; - if (UseCompressedOops) { - val_to_store = tmp1; - __ mov(val_to_store, new_val); - } - __ store_heap_oop(val_to_store, Address(store_addr)); // blows val_to_store: - val_to_store = noreg; - __ g1_write_barrier_post(store_addr, new_val, tmp1, tmp2, tmp3); - } - } - break; -#endif // INCLUDE_ALL_GCS - case BarrierSet::CardTableBarrierSet: - { - if (is_null) { - __ store_heap_oop_null(new_val, obj); - } else { - assert (!precise || (obj.index() == noreg && obj.disp() == 0), - "store check address should be calculated beforehand"); - - __ store_check_part1(tmp1); - __ store_heap_oop(new_val, obj); // blows new_val: - new_val = noreg; - __ store_check_part2(obj.base(), tmp1, tmp2); - } - } - break; - case BarrierSet::ModRef: - ShouldNotReachHere(); - break; - default: - ShouldNotReachHere(); - break; - } +static void do_oop_load(InterpreterMacroAssembler* _masm, + Register dst, + Address obj, + DecoratorSet decorators = 0) { + __ load_heap_oop(dst, obj, noreg, noreg, noreg); } Address TemplateTable::at_bcp(int offset) { @@ -863,7 +812,7 @@ const Register Rindex = R0_tos; index_check(Rarray, Rindex); - __ load_heap_oop(R0_tos, get_array_elem_addr(T_OBJECT, Rarray, Rindex, Rtemp)); + do_oop_load(_masm, R0_tos, get_array_elem_addr(T_OBJECT, Rarray, Rindex, Rtemp), IN_HEAP_ARRAY); } @@ -1248,7 +1197,7 @@ __ add(Raddr_1, Raddr_1, AsmOperand(Rindex_4, lsl, LogBytesPerHeapOop)); // Now store using the appropriate barrier - do_oop_store(_masm, Raddr_1, Rvalue_2, Rtemp, R0_tmp, R3_tmp, _bs->kind(), true, false); + do_oop_store(_masm, Raddr_1, Rvalue_2, Rtemp, R0_tmp, R3_tmp, false, IN_HEAP_ARRAY); __ b(done); __ bind(throw_array_store); @@ -1264,7 +1213,7 @@ __ profile_null_seen(R0_tmp); // Store a NULL - do_oop_store(_masm, Address::indexed_oop(Raddr_1, Rindex_4), Rvalue_2, Rtemp, R0_tmp, R3_tmp, _bs->kind(), true, true); + do_oop_store(_masm, Address::indexed_oop(Raddr_1, Rindex_4), Rvalue_2, Rtemp, R0_tmp, R3_tmp, true, IN_HEAP_ARRAY); // Pop stack arguments __ bind(done); @@ -3286,7 +3235,7 @@ // atos case for AArch64 and slow version on 32-bit ARM if(!atos_merged_with_itos) { __ bind(Latos); - __ load_heap_oop(R0_tos, Address(Robj, Roffset)); + do_oop_load(_masm, R0_tos, Address(Robj, Roffset)); __ push(atos); // Rewrite bytecode to be faster if (!is_static && rc == may_rewrite) { @@ -3638,7 +3587,7 @@ __ pop(atos); if (!is_static) pop_and_check_object(Robj); // Store into the field - do_oop_store(_masm, Address(Robj, Roffset), R0_tos, Rtemp, R1_tmp, R5_tmp, _bs->kind(), false, false); + do_oop_store(_masm, Address(Robj, Roffset), R0_tos, Rtemp, R1_tmp, R5_tmp, false); if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_aputfield, R0_tmp, Rtemp, true, byte_no); } @@ -3816,7 +3765,7 @@ #endif // AARCH64 case Bytecodes::_fast_aputfield: - do_oop_store(_masm, Address(Robj, Roffset), R0_tos, Rtemp, R1_tmp, R2_tmp, _bs->kind(), false, false); + do_oop_store(_masm, Address(Robj, Roffset), R0_tos, Rtemp, R1_tmp, R2_tmp, false); break; default: @@ -3912,7 +3861,7 @@ case Bytecodes::_fast_dgetfield: __ add(Roffset, Robj, Roffset); __ fldd(D0_tos, Address(Roffset)); break; #endif // __SOFTFP__ #endif // AARCH64 - case Bytecodes::_fast_agetfield: __ load_heap_oop(R0_tos, Address(Robj, Roffset)); __ verify_oop(R0_tos); break; + case Bytecodes::_fast_agetfield: do_oop_load(_masm, R0_tos, Address(Robj, Roffset)); __ verify_oop(R0_tos); break; default: ShouldNotReachHere(); } @@ -3992,7 +3941,7 @@ if (state == itos) { __ ldr_s32(R0_tos, Address(Robj, Roffset)); } else if (state == atos) { - __ load_heap_oop(R0_tos, Address(Robj, Roffset)); + do_oop_load(_masm, R0_tos, Address(Robj, Roffset)); __ verify_oop(R0_tos); } else if (state == ftos) { #ifdef AARCH64 --- /dev/null 2018-04-05 19:09:31.849588610 +0200 +++ new/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp 2018-04-30 11:27:57.923731331 +0200 @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/barrierSetAssembler.hpp" + +#define __ masm-> + +void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Register dst, Address src, Register tmp1, Register tmp2, Register tmp3) { + bool on_heap = (decorators & IN_HEAP) != 0; + bool on_root = (decorators & IN_ROOT) != 0; + switch (type) { + case T_OBJECT: + case T_ARRAY: { + if (on_heap) { +#ifdef AARCH64 + if (UseCompressedOops) { + __ ldr_w(dst, src); + __ decode_heap_oop(dst); + } else +#endif // AARCH64 + { + __ ldr(dst, src); + } + } else { + assert(on_root, "why else?"); + __ ldr(dst, src); + } + break; + } + default: Unimplemented(); + } + +} + +void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, + Address obj, Register val, Register tmp1, Register tmp2, Register tmp3, bool is_null) { + bool on_heap = (decorators & IN_HEAP) != 0; + bool on_root = (decorators & IN_ROOT) != 0; + switch (type) { + case T_OBJECT: + case T_ARRAY: { + if (on_heap) { +#ifdef AARCH64 + if (UseCompressedOops) { + assert(!dst.uses(src), "not enough registers"); + if (!is_null) { + __ encode_heap_oop(src); + } + __ str_w(val, obj); + } else +#endif // AARCH64 + { + __ str(val, obj); + } + } else { + assert(on_root, "why else?"); + __ str(val, obj); + } + break; + } + default: Unimplemented(); + } +} +