diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp new file mode 100644 index 0000000..564a1ce --- /dev/null +++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp @@ -0,0 +1,771 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#include "gc/g1/g1BarrierSet.hpp" +#include "gc/g1/g1CardTable.hpp" +#include "gc/g1/g1ThreadLocalData.hpp" +#include "gc/g1/heapRegion.hpp" +#include "opto/arraycopynode.hpp" +#include "opto/graphKit.hpp" +#include "opto/idealKit.hpp" +#include "opto/macro.hpp" +#include "opto/type.hpp" +#include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" + +const TypeFunc *G1BarrierSetC2::g1_wb_pre_Type() { + const Type **fields = TypeTuple::fields(2); + fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value + fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); + + // create result type (range) + fields = TypeTuple::fields(0); + const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields); + + return TypeFunc::make(domain, range); +} + +const TypeFunc *G1BarrierSetC2::g1_wb_post_Type() { + const Type **fields = TypeTuple::fields(2); + fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Card addr + fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); + + // create result type (range) + fields = TypeTuple::fields(0); + const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields); + + return TypeFunc::make(domain, range); +} + +#define __ ideal. +/* + * Determine if the G1 pre-barrier can be removed. The pre-barrier is + * required by SATB to make sure all objects live at the start of the + * marking are kept alive, all reference updates need to any previous + * reference stored before writing. + * + * If the previous value is NULL there is no need to save the old value. + * References that are NULL are filtered during runtime by the barrier + * code to avoid unnecessary queuing. + * + * However in the case of newly allocated objects it might be possible to + * prove that the reference about to be overwritten is NULL during compile + * time and avoid adding the barrier code completely. + * + * The compiler needs to determine that the object in which a field is about + * to be written is newly allocated, and that no prior store to the same field + * has happened since the allocation. + * + * Returns true if the pre-barrier can be removed + */ +bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit, + PhaseTransform* phase, + Node* adr, + BasicType bt, + uint adr_idx) const { + intptr_t offset = 0; + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); + AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase); + + if (offset == Type::OffsetBot) { + return false; // cannot unalias unless there are precise offsets + } + + if (alloc == NULL) { + return false; // No allocation found + } + + intptr_t size_in_bytes = type2aelembytes(bt); + + Node* mem = kit->memory(adr_idx); // start searching here... + + for (int cnt = 0; cnt < 50; cnt++) { + + if (mem->is_Store()) { + + Node* st_adr = mem->in(MemNode::Address); + intptr_t st_offset = 0; + Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset); + + if (st_base == NULL) { + break; // inscrutable pointer + } + + // Break we have found a store with same base and offset as ours so break + if (st_base == base && st_offset == offset) { + break; + } + + if (st_offset != offset && st_offset != Type::OffsetBot) { + const int MAX_STORE = BytesPerLong; + if (st_offset >= offset + size_in_bytes || + st_offset <= offset - MAX_STORE || + st_offset <= offset - mem->as_Store()->memory_size()) { + // Success: The offsets are provably independent. + // (You may ask, why not just test st_offset != offset and be done? + // The answer is that stores of different sizes can co-exist + // in the same sequence of RawMem effects. We sometimes initialize + // a whole 'tile' of array elements with a single jint or jlong.) + mem = mem->in(MemNode::Memory); + continue; // advance through independent store memory + } + } + + if (st_base != base + && MemNode::detect_ptr_independence(base, alloc, st_base, + AllocateNode::Ideal_allocation(st_base, phase), + phase)) { + // Success: The bases are provably independent. + mem = mem->in(MemNode::Memory); + continue; // advance through independent store memory + } + } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) { + + InitializeNode* st_init = mem->in(0)->as_Initialize(); + AllocateNode* st_alloc = st_init->allocation(); + + // Make sure that we are looking at the same allocation site. + // The alloc variable is guaranteed to not be null here from earlier check. + if (alloc == st_alloc) { + // Check that the initialization is storing NULL so that no previous store + // has been moved up and directly write a reference + Node* captured_store = st_init->find_captured_store(offset, + type2aelembytes(T_OBJECT), + phase); + if (captured_store == NULL || captured_store == st_init->zero_memory()) { + return true; + } + } + } + + // Unless there is an explicit 'continue', we must bail out here, + // because 'mem' is an inscrutable memory state (e.g., a call). + break; + } + + return false; +} + +// G1 pre/post barriers +void G1BarrierSetC2::pre_barrier(GraphKit* kit, + bool do_load, + Node* ctl, + Node* obj, + Node* adr, + uint alias_idx, + Node* val, + const TypeOopPtr* val_type, + Node* pre_val, + BasicType bt) const { + // Some sanity checks + // Note: val is unused in this routine. + + if (do_load) { + // We need to generate the load of the previous value + assert(obj != NULL, "must have a base"); + assert(adr != NULL, "where are loading from?"); + assert(pre_val == NULL, "loaded already?"); + assert(val_type != NULL, "need a type"); + + if (use_ReduceInitialCardMarks() + && g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, bt, alias_idx)) { + return; + } + + } else { + // In this case both val_type and alias_idx are unused. + assert(pre_val != NULL, "must be loaded already"); + // Nothing to be done if pre_val is null. + if (pre_val->bottom_type() == TypePtr::NULL_PTR) return; + assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here"); + } + assert(bt == T_OBJECT, "or we shouldn't be here"); + + IdealKit ideal(kit, true); + + Node* tls = __ thread(); // ThreadLocalStorage + + Node* no_base = __ top(); + Node* zero = __ ConI(0); + Node* zeroX = __ ConX(0); + + float likely = PROB_LIKELY(0.999); + float unlikely = PROB_UNLIKELY(0.999); + + BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE; + assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width"); + + // Offsets into the thread + const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); + const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()); + const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); + + // Now the actual pointers into the thread + Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset)); + Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); + Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); + + // Now some of the values + Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); + + // if (!marking) + __ if_then(marking, BoolTest::ne, zero, unlikely); { + BasicType index_bt = TypeX_X->basic_type(); + assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size."); + Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw); + + if (do_load) { + // load original value + // alias_idx correct?? + pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx); + } + + // if (pre_val != NULL) + __ if_then(pre_val, BoolTest::ne, kit->null()); { + Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); + + // is the queue for this thread full? + __ if_then(index, BoolTest::ne, zeroX, likely); { + + // decrement the index + Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); + + // Now get the buffer location we will log the previous value into and store it + Node *log_addr = __ AddP(no_base, buffer, next_index); + __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered); + // update the index + __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered); + + } __ else_(); { + + // logging buffer is full, call the runtime + const TypeFunc *tf = g1_wb_pre_Type(); + __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls); + } __ end_if(); // (!index) + } __ end_if(); // (pre_val != NULL) + } __ end_if(); // (!marking) + + // Final sync IdealKit and GraphKit. + kit->final_sync(ideal); +} + +/* + * G1 similar to any GC with a Young Generation requires a way to keep track of + * references from Old Generation to Young Generation to make sure all live + * objects are found. G1 also requires to keep track of object references + * between different regions to enable evacuation of old regions, which is done + * as part of mixed collections. References are tracked in remembered sets and + * is continuously updated as reference are written to with the help of the + * post-barrier. + * + * To reduce the number of updates to the remembered set the post-barrier + * filters updates to fields in objects located in the Young Generation, + * the same region as the reference, when the NULL is being written or + * if the card is already marked as dirty by an earlier write. + * + * Under certain circumstances it is possible to avoid generating the + * post-barrier completely if it is possible during compile time to prove + * the object is newly allocated and that no safepoint exists between the + * allocation and the store. + * + * In the case of slow allocation the allocation code must handle the barrier + * as part of the allocation in the case the allocated object is not located + * in the nursery, this would happen for humongous objects. This is similar to + * how CMS is required to handle this case, see the comments for the method + * CollectedHeap::new_deferred_store_barrier and OptoRuntime::new_deferred_store_barrier. + * A deferred card mark is required for these objects and handled in the above + * mentioned methods. + * + * Returns true if the post barrier can be removed + */ +bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit, + PhaseTransform* phase, Node* store, + Node* adr) const { + intptr_t offset = 0; + Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); + AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase); + + if (offset == Type::OffsetBot) { + return false; // cannot unalias unless there are precise offsets + } + + if (alloc == NULL) { + return false; // No allocation found + } + + // Start search from Store node + Node* mem = store->in(MemNode::Control); + if (mem->is_Proj() && mem->in(0)->is_Initialize()) { + + InitializeNode* st_init = mem->in(0)->as_Initialize(); + AllocateNode* st_alloc = st_init->allocation(); + + // Make sure we are looking at the same allocation + if (alloc == st_alloc) { + return true; + } + } + + return false; +} + +// +// Update the card table and add card address to the queue +// +void G1BarrierSetC2::g1_mark_card(GraphKit* kit, + IdealKit& ideal, + Node* card_adr, + Node* oop_store, + uint oop_alias_idx, + Node* index, + Node* index_adr, + Node* buffer, + const TypeFunc* tf) const { + Node* zero = __ ConI(0); + Node* zeroX = __ ConX(0); + Node* no_base = __ top(); + BasicType card_bt = T_BYTE; + // Smash zero into card. MUST BE ORDERED WRT TO STORE + __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw); + + // Now do the queue work + __ if_then(index, BoolTest::ne, zeroX); { + + Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); + Node* log_addr = __ AddP(no_base, buffer, next_index); + + // Order, see storeCM. + __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered); + __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered); + + } __ else_(); { + __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread()); + } __ end_if(); + +} + +void G1BarrierSetC2::post_barrier(GraphKit* kit, + Node* ctl, + Node* oop_store, + Node* obj, + Node* adr, + uint alias_idx, + Node* val, + BasicType bt, + bool use_precise) const { + // If we are writing a NULL then we need no post barrier + + if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) { + // Must be NULL + const Type* t = val->bottom_type(); + assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL"); + // No post barrier if writing NULLx + return; + } + + if (use_ReduceInitialCardMarks() && obj == kit->just_allocated_object(kit->control())) { + // We can skip marks on a freshly-allocated object in Eden. + // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp. + // That routine informs GC to take appropriate compensating steps, + // upon a slow-path allocation, so as to make this card-mark + // elision safe. + return; + } + + if (use_ReduceInitialCardMarks() + && g1_can_remove_post_barrier(kit, &kit->gvn(), oop_store, adr)) { + return; + } + + if (!use_precise) { + // All card marks for a (non-array) instance are in one place: + adr = obj; + } + // (Else it's an array (or unknown), and we want more precise card marks.) + assert(adr != NULL, ""); + + IdealKit ideal(kit, true); + + Node* tls = __ thread(); // ThreadLocalStorage + + Node* no_base = __ top(); + float unlikely = PROB_UNLIKELY(0.999); + Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val()); + Node* dirty_card = __ ConI((jint)G1CardTable::dirty_card_val()); + Node* zeroX = __ ConX(0); + + const TypeFunc *tf = g1_wb_post_Type(); + + // Offsets into the thread + const int index_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()); + const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()); + + // Pointers into the thread + + Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); + Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); + + // Now some values + // Use ctrl to avoid hoisting these values past a safepoint, which could + // potentially reset these fields in the JavaThread. + Node* index = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw); + Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); + + // Convert the store obj pointer to an int prior to doing math on it + // Must use ctrl to prevent "integerized oop" existing across safepoint + Node* cast = __ CastPX(__ ctrl(), adr); + + // Divide pointer by card size + Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) ); + + // Combine card table base and card offset + Node* card_adr = __ AddP(no_base, byte_map_base_node(kit), card_offset ); + + // If we know the value being stored does it cross regions? + + if (val != NULL) { + // Does the store cause us to cross regions? + + // Should be able to do an unsigned compare of region_size instead of + // and extra shift. Do we have an unsigned compare?? + // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes); + Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes)); + + // if (xor_res == 0) same region so skip + __ if_then(xor_res, BoolTest::ne, zeroX); { + + // No barrier if we are storing a NULL + __ if_then(val, BoolTest::ne, kit->null(), unlikely); { + + // Ok must mark the card if not already dirty + + // load the original value of the card + Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); + + __ if_then(card_val, BoolTest::ne, young_card); { + kit->sync_kit(ideal); + kit->insert_mem_bar(Op_MemBarVolatile, oop_store); + __ sync_kit(kit); + + Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); + __ if_then(card_val_reload, BoolTest::ne, dirty_card); { + g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); + } __ end_if(); + } __ end_if(); + } __ end_if(); + } __ end_if(); + } else { + // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks. + // We don't need a barrier here if the destination is a newly allocated object + // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden + // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()). + assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); + Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); + __ if_then(card_val, BoolTest::ne, young_card); { + g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); + } __ end_if(); + } + + // Final sync IdealKit and GraphKit. + kit->final_sync(ideal); +} + +// Helper that guards and inserts a pre-barrier. +void G1BarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, + Node* pre_val, bool need_mem_bar) const { + // We could be accessing the referent field of a reference object. If so, when G1 + // is enabled, we need to log the value in the referent field in an SATB buffer. + // This routine performs some compile time filters and generates suitable + // runtime filters that guard the pre-barrier code. + // Also add memory barrier for non volatile load from the referent field + // to prevent commoning of loads across safepoint. + + // Some compile time checks. + + // If offset is a constant, is it java_lang_ref_Reference::_reference_offset? + const TypeX* otype = offset->find_intptr_t_type(); + if (otype != NULL && otype->is_con() && + otype->get_con() != java_lang_ref_Reference::referent_offset) { + // Constant offset but not the reference_offset so just return + return; + } + + // We only need to generate the runtime guards for instances. + const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr(); + if (btype != NULL) { + if (btype->isa_aryptr()) { + // Array type so nothing to do + return; + } + + const TypeInstPtr* itype = btype->isa_instptr(); + if (itype != NULL) { + // Can the klass of base_oop be statically determined to be + // _not_ a sub-class of Reference and _not_ Object? + ciKlass* klass = itype->klass(); + if ( klass->is_loaded() && + !klass->is_subtype_of(kit->env()->Reference_klass()) && + !kit->env()->Object_klass()->is_subtype_of(klass)) { + return; + } + } + } + + // The compile time filters did not reject base_oop/offset so + // we need to generate the following runtime filters + // + // if (offset == java_lang_ref_Reference::_reference_offset) { + // if (instance_of(base, java.lang.ref.Reference)) { + // pre_barrier(_, pre_val, ...); + // } + // } + + float likely = PROB_LIKELY( 0.999); + float unlikely = PROB_UNLIKELY(0.999); + + IdealKit ideal(kit); + + Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset); + + __ if_then(offset, BoolTest::eq, referent_off, unlikely); { + // Update graphKit memory and control from IdealKit. + kit->sync_kit(ideal); + + Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass())); + Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con); + + // Update IdealKit memory and control from graphKit. + __ sync_kit(kit); + + Node* one = __ ConI(1); + // is_instof == 0 if base_oop == NULL + __ if_then(is_instof, BoolTest::eq, one, unlikely); { + + // Update graphKit from IdeakKit. + kit->sync_kit(ideal); + + // Use the pre-barrier to record the value in the referent field + pre_barrier(kit, false /* do_load */, + __ ctrl(), + NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */, + pre_val /* pre_val */, + T_OBJECT); + if (need_mem_bar) { + // Add memory barrier to prevent commoning reads from this field + // across safepoint since GC can change its value. + kit->insert_mem_bar(Op_MemBarCPUOrder); + } + // Update IdealKit from graphKit. + __ sync_kit(kit); + + } __ end_if(); // _ref_type != ref_none + } __ end_if(); // offset == referent_offset + + // Final sync IdealKit and GraphKit. + kit->final_sync(ideal); +} + +#undef __ + +Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { + DecoratorSet decorators = access.decorators(); + GraphKit* kit = access.kit(); + + Node* adr = access.addr().node(); + Node* obj = access.base(); + + bool mismatched = (decorators & C2_MISMATCHED) != 0; + bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool on_heap = (decorators & IN_HEAP) != 0; + bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; + bool is_unordered = (decorators & MO_UNORDERED) != 0; + bool need_cpu_mem_bar = !is_unordered || mismatched || !on_heap; + + Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : kit->top(); + Node* load = CardTableBarrierSetC2::load_at_resolved(access, val_type); + + // If we are reading the value of the referent field of a Reference + // object (either by using Unsafe directly or through reflection) + // then, if G1 is enabled, we need to record the referent in an + // SATB log buffer using the pre-barrier mechanism. + // Also we need to add memory barrier to prevent commoning reads + // from this field across safepoint since GC can change its value. + bool need_read_barrier = on_heap && (on_weak || + (unknown && offset != kit->top() && obj != kit->top())); + + if (!access.is_oop() || !need_read_barrier) { + return load; + } + + if (on_weak) { + // Use the pre-barrier to record the value in the referent field + pre_barrier(kit, false /* do_load */, + kit->control(), + NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */, + load /* pre_val */, T_OBJECT); + // Add memory barrier to prevent commoning reads from this field + // across safepoint since GC can change its value. + kit->insert_mem_bar(Op_MemBarCPUOrder); + } else if (unknown) { + // We do not require a mem bar inside pre_barrier if need_mem_bar + // is set: the barriers would be emitted by us. + insert_pre_barrier(kit, obj, offset, load, !need_cpu_mem_bar); + } + + return load; +} + +bool G1BarrierSetC2::is_gc_barrier_node(Node* node) const { + if (CardTableBarrierSetC2::is_gc_barrier_node(node)) { + return true; + } + if (node->Opcode() != Op_CallLeaf) { + return false; + } + CallLeafNode *call = node->as_CallLeaf(); + if (call->_name == NULL) { + return false; + } + + return strcmp(call->_name, "g1_wb_pre") == 0 || strcmp(call->_name, "g1_wb_post") == 0; +} + +void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { + assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); + assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes"); + // It could be only one user, URShift node, in Object.clone() intrinsic + // but the new allocation is passed to arraycopy stub and it could not + // be scalar replaced. So we don't check the case. + + // An other case of only one user (Xor) is when the value check for NULL + // in G1 post barrier is folded after CCP so the code which used URShift + // is removed. + + // Take Region node before eliminating post barrier since it also + // eliminates CastP2X node when it has only one user. + Node* this_region = node->in(0); + assert(this_region != NULL, ""); + + // Remove G1 post barrier. + + // Search for CastP2X->Xor->URShift->Cmp path which + // checks if the store done to a different from the value's region. + // And replace Cmp with #0 (false) to collapse G1 post barrier. + Node* xorx = node->find_out_with(Op_XorX); + if (xorx != NULL) { + Node* shift = xorx->unique_out(); + Node* cmpx = shift->unique_out(); + assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && + cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, + "missing region check in G1 post barrier"); + macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); + + // Remove G1 pre barrier. + + // Search "if (marking != 0)" check and set it to "false". + // There is no G1 pre barrier if previous stored value is NULL + // (for example, after initialization). + if (this_region->is_Region() && this_region->req() == 3) { + int ind = 1; + if (!this_region->in(ind)->is_IfFalse()) { + ind = 2; + } + if (this_region->in(ind)->is_IfFalse()) { + Node* bol = this_region->in(ind)->in(0)->in(1); + assert(bol->is_Bool(), ""); + cmpx = bol->in(1); + if (bol->as_Bool()->_test._test == BoolTest::ne && + cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) && + cmpx->in(1)->is_Load()) { + Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address); + const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); + if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() && + adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal && + adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) { + macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); + } + } + } + } + } else { + assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); + // This is a G1 post barrier emitted by the Object.clone() intrinsic. + // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card + // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier. + Node* shift = node->find_out_with(Op_URShiftX); + assert(shift != NULL, "missing G1 post barrier"); + Node* addp = shift->unique_out(); + Node* load = addp->find_out_with(Op_LoadB); + assert(load != NULL, "missing G1 post barrier"); + Node* cmpx = load->unique_out(); + assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && + cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, + "missing card value check in G1 post barrier"); + macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); + // There is no G1 pre barrier in this case + } + // Now CastP2X can be removed since it is used only on dead path + // which currently still alive until igvn optimize it. + assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, ""); + macro->replace_node(node, macro->top()); +} + +Node* G1BarrierSetC2::step_over_gc_barrier(Node* c) const { + if (!use_ReduceInitialCardMarks() && + c != NULL && c->is_Region() && c->req() == 3) { + for (uint i = 1; i < c->req(); i++) { + if (c->in(i) != NULL && c->in(i)->is_Region() && + c->in(i)->req() == 3) { + Node* r = c->in(i); + for (uint j = 1; j < r->req(); j++) { + if (r->in(j) != NULL && r->in(j)->is_Proj() && + r->in(j)->in(0) != NULL && + r->in(j)->in(0)->Opcode() == Op_CallLeaf && + r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post)) { + Node* call = r->in(j)->in(0); + c = c->in(i == 1 ? 2 : 1); + if (c != NULL) { + c = c->in(0); + if (c != NULL) { + c = c->in(0); + assert(call->in(0) == NULL || + call->in(0)->in(0) == NULL || + call->in(0)->in(0)->in(0) == NULL || + call->in(0)->in(0)->in(0)->in(0) == NULL || + call->in(0)->in(0)->in(0)->in(0)->in(0) == NULL || + c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape"); + return c; + } + } + } + } + } + } + } + return c; +} diff --git a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp new file mode 100644 index 0000000..6a3f1cc --- /dev/null +++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHARED_C2_G1BARRIERSETC2_HPP +#define SHARE_GC_SHARED_C2_G1BARRIERSETC2_HPP + +#include "gc/shared/c2/cardTableBarrierSetC2.hpp" + +class PhaseTransform; +class Type; +class TypeFunc; + +class G1BarrierSetC2: public CardTableBarrierSetC2 { +protected: + virtual void pre_barrier(GraphKit* kit, + bool do_load, + Node* ctl, + Node* obj, + Node* adr, + uint adr_idx, + Node* val, + const TypeOopPtr* val_type, + Node* pre_val, + BasicType bt) const; + + virtual void post_barrier(GraphKit* kit, + Node* ctl, + Node* store, + Node* obj, + Node* adr, + uint adr_idx, + Node* val, + BasicType bt, + bool use_precise) const; + + bool g1_can_remove_pre_barrier(GraphKit* kit, + PhaseTransform* phase, + Node* adr, + BasicType bt, + uint adr_idx) const; + + bool g1_can_remove_post_barrier(GraphKit* kit, + PhaseTransform* phase, Node* store, + Node* adr) const; + + void g1_mark_card(GraphKit* kit, + IdealKit& ideal, + Node* card_adr, + Node* oop_store, + uint oop_alias_idx, + Node* index, + Node* index_adr, + Node* buffer, + const TypeFunc* tf) const; + + // Helper for unsafe accesses, that may or may not be on the referent field. + // Generates the guards that check whether the result of + // Unsafe.getObject should be recorded in an SATB log buffer. + void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar) const; + + static const TypeFunc* g1_wb_pre_Type(); + static const TypeFunc* g1_wb_post_Type(); + + virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const; + + public: + virtual bool is_gc_barrier_node(Node* node) const; + virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; + virtual Node* step_over_gc_barrier(Node* c) const; +}; + +#endif // SHARE_GC_SHARED_C2_G1BARRIERSETC2_HPP diff --git a/src/hotspot/share/gc/g1/g1BarrierSet.cpp b/src/hotspot/share/gc/g1/g1BarrierSet.cpp index f5b90a4..ecd1d17 100644 --- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp +++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp @@ -34,14 +34,19 @@ #include "oops/access.inline.hpp" #include "oops/compressedOops.inline.hpp" #include "oops/oop.inline.hpp" +#include "runtime/interfaceSupport.inline.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/thread.inline.hpp" #include "utilities/macros.hpp" #ifdef COMPILER1 #include "gc/g1/c1/g1BarrierSetC1.hpp" #endif +#ifdef COMPILER2 +#include "gc/g1/c2/g1BarrierSetC2.hpp" +#endif class G1BarrierSetC1; +class G1BarrierSetC2; SATBMarkQueueSet G1BarrierSet::_satb_mark_queue_set; DirtyCardQueueSet G1BarrierSet::_dirty_card_queue_set; @@ -49,6 +54,7 @@ DirtyCardQueueSet G1BarrierSet::_dirty_card_queue_set; G1BarrierSet::G1BarrierSet(G1CardTable* card_table) : CardTableBarrierSet(make_barrier_set_assembler(), make_barrier_set_c1(), + make_barrier_set_c2(), card_table, BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)) {} diff --git a/src/hotspot/share/gc/shared/barrierSet.hpp b/src/hotspot/share/gc/shared/barrierSet.hpp index 3972ddc..748f33b 100644 --- a/src/hotspot/share/gc/shared/barrierSet.hpp +++ b/src/hotspot/share/gc/shared/barrierSet.hpp @@ -35,6 +35,7 @@ class BarrierSetAssembler; class BarrierSetC1; +class BarrierSetC2; class JavaThread; // This class provides the interface between a barrier implementation and @@ -70,6 +71,7 @@ private: FakeRtti _fake_rtti; BarrierSetAssembler* _barrier_set_assembler; BarrierSetC1* _barrier_set_c1; + BarrierSetC2* _barrier_set_c2; public: // Metafunction mapping a class derived from BarrierSet to the @@ -92,10 +94,12 @@ public: protected: BarrierSet(BarrierSetAssembler* barrier_set_assembler, BarrierSetC1* barrier_set_c1, + BarrierSetC2* barrier_set_c2, const FakeRtti& fake_rtti) : _fake_rtti(fake_rtti), _barrier_set_assembler(barrier_set_assembler), - _barrier_set_c1(barrier_set_c1) {} + _barrier_set_c1(barrier_set_c1), + _barrier_set_c2(barrier_set_c2) {} ~BarrierSet() { } template @@ -108,6 +112,11 @@ protected: return COMPILER1_PRESENT(new BarrierSetC1T()) NOT_COMPILER1(NULL); } + template + BarrierSetC2* make_barrier_set_c2() { + return COMPILER2_PRESENT(new BarrierSetC2T()) NOT_COMPILER2(NULL); + } + public: // Support for optimizing compilers to call the barrier set on slow path allocations // that did not enter a TLAB. Used for e.g. ReduceInitialCardMarks. @@ -138,6 +147,11 @@ public: return _barrier_set_c1; } + BarrierSetC2* barrier_set_c2() { + assert(_barrier_set_c2 != NULL, "should be set"); + return _barrier_set_c2; + } + // The AccessBarrier of a BarrierSet subclass is called by the Access API // (cf. oops/access.hpp) to perform decorated accesses. GC implementations // may override these default access operations by declaring an diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp new file mode 100644 index 0000000..43bbe8d --- /dev/null +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/shared/c2/barrierSetC2.hpp" +#include "opto/arraycopynode.hpp" +#include "opto/graphKit.hpp" +#include "opto/idealKit.hpp" +#include "opto/narrowptrnode.hpp" +#include "utilities/macros.hpp" + +// By default this is a no-op. +void BarrierSetC2::resolve_address(C2Access& access) const { } + +void* C2Access::barrier_set_state() const { + return _kit->barrier_set_state(); +} + +bool C2Access::needs_cpu_membar() const { + bool mismatched = (_decorators & C2_MISMATCHED) != 0; + bool is_unordered = (_decorators & MO_UNORDERED) != 0; + bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; + bool on_heap = (_decorators & IN_HEAP) != 0; + + bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; + bool is_read = (_decorators & C2_READ_ACCESS) != 0; + bool is_atomic = is_read && is_write; + + if (is_atomic) { + // Atomics always need to be wrapped in CPU membars + return true; + } + + if (anonymous) { + // We will need memory barriers unless we can determine a unique + // alias category for this reference. (Note: If for some reason + // the barriers get omitted and the unsafe reference begins to "pollute" + // the alias analysis of the rest of the graph, either Compile::can_alias + // or Compile::must_alias will throw a diagnostic assert.) + if (!on_heap || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) { + return true; + } + } + + return false; +} + +Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { + DecoratorSet decorators = access.decorators(); + GraphKit* kit = access.kit(); + + bool mismatched = (decorators & C2_MISMATCHED) != 0; + bool unaligned = (decorators & C2_UNALIGNED) != 0; + bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; + + bool in_root = (decorators & IN_ROOT) != 0; + assert(!in_root, "not supported yet"); + + if (access.type() == T_DOUBLE) { + Node* new_val = kit->dstore_rounding(val.node()); + val.set_node(new_val); + } + + MemNode::MemOrd mo = access.mem_node_mo(); + + Node* store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), access.type(), + access.addr().type(), mo, requires_atomic_access, unaligned, mismatched); + access.set_raw_access(store); + return store; +} + +Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const { + DecoratorSet decorators = access.decorators(); + GraphKit* kit = access.kit(); + + Node* adr = access.addr().node(); + const TypePtr* adr_type = access.addr().type(); + + bool mismatched = (decorators & C2_MISMATCHED) != 0; + bool requires_atomic_access = (decorators & MO_UNORDERED) == 0; + bool unaligned = (decorators & C2_UNALIGNED) != 0; + bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0; + bool pinned = (decorators & C2_PINNED_LOAD) != 0; + + bool in_root = (decorators & IN_ROOT) != 0; + assert(!in_root, "not supported yet"); + + MemNode::MemOrd mo = access.mem_node_mo(); + LoadNode::ControlDependency dep = pinned ? LoadNode::Pinned : LoadNode::DependsOnlyOnTest; + Node* control = control_dependent ? kit->control() : NULL; + + Node* load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo, + dep, requires_atomic_access, unaligned, mismatched); + access.set_raw_access(load); + + return load; +} + +class C2AccessFence: public StackObj { + C2Access& _access; + +public: + C2AccessFence(C2Access& access) : + _access(access) { + GraphKit* kit = access.kit(); + DecoratorSet decorators = access.decorators(); + + bool is_write = (decorators & C2_WRITE_ACCESS) != 0; + bool is_read = (decorators & C2_READ_ACCESS) != 0; + bool is_atomic = is_read && is_write; + + bool is_volatile = (decorators & MO_SEQ_CST) != 0; + bool is_release = (decorators & MO_RELEASE) != 0; + + if (is_atomic) { + // Memory-model-wise, a LoadStore acts like a little synchronized + // block, so needs barriers on each side. These don't translate + // into actual barriers on most machines, but we still need rest of + // compiler to respect ordering. + if (is_release) { + kit->insert_mem_bar(Op_MemBarRelease); + } else if (is_volatile) { + if (support_IRIW_for_not_multiple_copy_atomic_cpu) { + kit->insert_mem_bar(Op_MemBarVolatile); + } else { + kit->insert_mem_bar(Op_MemBarRelease); + } + } + } else if (is_write) { + // If reference is volatile, prevent following memory ops from + // floating down past the volatile write. Also prevents commoning + // another volatile read. + if (is_volatile || is_release) { + kit->insert_mem_bar(Op_MemBarRelease); + } + } else { + // Memory barrier to prevent normal and 'unsafe' accesses from + // bypassing each other. Happens after null checks, so the + // exception paths do not take memory state from the memory barrier, + // so there's no problems making a strong assert about mixing users + // of safe & unsafe memory. + if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) { + kit->insert_mem_bar(Op_MemBarVolatile); + } + } + + if (access.needs_cpu_membar()) { + kit->insert_mem_bar(Op_MemBarCPUOrder); + } + + if (is_atomic) { + // 4984716: MemBars must be inserted before this + // memory node in order to avoid a false + // dependency which will confuse the scheduler. + access.set_memory(); + } + } + + ~C2AccessFence() { + GraphKit* kit = _access.kit(); + DecoratorSet decorators = _access.decorators(); + + bool is_write = (decorators & C2_WRITE_ACCESS) != 0; + bool is_read = (decorators & C2_READ_ACCESS) != 0; + bool is_atomic = is_read && is_write; + + bool is_volatile = (decorators & MO_SEQ_CST) != 0; + bool is_acquire = (decorators & MO_ACQUIRE) != 0; + + // If reference is volatile, prevent following volatiles ops from + // floating up before the volatile access. + if (_access.needs_cpu_membar()) { + kit->insert_mem_bar(Op_MemBarCPUOrder); + } + + if (is_atomic) { + if (is_acquire || is_volatile) { + kit->insert_mem_bar(Op_MemBarAcquire); + } + } else if (is_write) { + // If not multiple copy atomic, we do the MemBarVolatile before the load. + if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) { + kit->insert_mem_bar(Op_MemBarVolatile); // Use fat membar + } + } else { + if (is_volatile || is_acquire) { + kit->insert_mem_bar(Op_MemBarAcquire, _access.raw_access()); + } + } + } + +}; + +Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const { + C2AccessFence fence(access); + resolve_address(access); + return store_at_resolved(access, val); +} + +Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const { + C2AccessFence fence(access); + resolve_address(access); + return load_at_resolved(access, val_type); +} + +MemNode::MemOrd C2Access::mem_node_mo() const { + bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; + bool is_read = (_decorators & C2_READ_ACCESS) != 0; + if ((_decorators & MO_SEQ_CST) != 0) { + if (is_write && is_read) { + // For atomic operations + return MemNode::seqcst; + } else if (is_write) { + return MemNode::release; + } else { + assert(is_read, "what else?"); + return MemNode::acquire; + } + } else if ((_decorators & MO_RELEASE) != 0) { + return MemNode::release; + } else if ((_decorators & MO_ACQUIRE) != 0) { + return MemNode::acquire; + } else if (is_write) { + // Volatile fields need releasing stores. + // Non-volatile fields also need releasing stores if they hold an + // object reference, because the object reference might point to + // a freshly created object. + // Conservatively release stores of object references. + return StoreNode::release_if_reference(_type); + } else { + return MemNode::unordered; + } +} + +void C2Access::fixup_decorators() { + bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0; + bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo; + bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0; + + bool is_read = (_decorators & C2_READ_ACCESS) != 0; + bool is_write = (_decorators & C2_WRITE_ACCESS) != 0; + + if (AlwaysAtomicAccesses && is_unordered) { + _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits + _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess + } + + _decorators = AccessInternal::decorator_fixup(_decorators); + + if (is_read && !is_write && anonymous) { + // To be valid, unsafe loads may depend on other conditions than + // the one that guards them: pin the Load node + _decorators |= C2_CONTROL_DEPENDENT_LOAD; + _decorators |= C2_PINNED_LOAD; + const TypePtr* adr_type = _addr.type(); + Node* adr = _addr.node(); + if (!needs_cpu_membar() && adr_type->isa_instptr()) { + assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null"); + intptr_t offset = Type::OffsetBot; + AddPNode::Ideal_base_and_offset(adr, &_kit->gvn(), offset); + if (offset >= 0) { + int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->klass()->layout_helper()); + if (offset < s) { + // Guaranteed to be a valid access, no need to pin it + _decorators ^= C2_CONTROL_DEPENDENT_LOAD; + _decorators ^= C2_PINNED_LOAD; + } + } + } + } +} + +//--------------------------- atomic operations--------------------------------- + +static void pin_atomic_op(C2AtomicAccess& access) { + if (!access.needs_pinning()) { + return; + } + // SCMemProjNodes represent the memory state of a LoadStore. Their + // main role is to prevent LoadStore nodes from being optimized away + // when their results aren't used. + GraphKit* kit = access.kit(); + Node* load_store = access.raw_access(); + assert(load_store != NULL, "must pin atomic op"); + Node* proj = kit->gvn().transform(new SCMemProjNode(load_store)); + kit->set_memory(proj, access.alias_idx()); +} + +void C2AtomicAccess::set_memory() { + Node *mem = _kit->memory(_alias_idx); + _memory = mem; +} + +Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + MemNode::MemOrd mo = access.mem_node_mo(); + Node* mem = access.memory(); + + Node* adr = access.addr().node(); + const TypePtr* adr_type = access.addr().type(); + + Node* load_store = NULL; + + if (access.is_oop()) { +#ifdef _LP64 + if (adr->bottom_type()->is_ptr_to_narrowoop()) { + Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); + Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); + load_store = kit->gvn().transform(new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo)); + } else +#endif + { + load_store = kit->gvn().transform(new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo)); + } + } else { + switch (access.type()) { + case T_BYTE: { + load_store = kit->gvn().transform(new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); + break; + } + case T_SHORT: { + load_store = kit->gvn().transform(new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); + break; + } + case T_INT: { + load_store = kit->gvn().transform(new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); + break; + } + case T_LONG: { + load_store = kit->gvn().transform(new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo)); + break; + } + default: + ShouldNotReachHere(); + } + } + + access.set_raw_access(load_store); + pin_atomic_op(access); + +#ifdef _LP64 + if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { + return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); + } +#endif + + return load_store; +} + +Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + DecoratorSet decorators = access.decorators(); + MemNode::MemOrd mo = access.mem_node_mo(); + Node* mem = access.memory(); + bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0; + Node* load_store = NULL; + Node* adr = access.addr().node(); + + if (access.is_oop()) { +#ifdef _LP64 + if (adr->bottom_type()->is_ptr_to_narrowoop()) { + Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); + Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop())); + if (is_weak_cas) { + load_store = kit->gvn().transform(new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); + } else { + load_store = kit->gvn().transform(new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo)); + } + } else +#endif + { + if (is_weak_cas) { + load_store = kit->gvn().transform(new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); + } else { + load_store = kit->gvn().transform(new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo)); + } + } + } else { + switch(access.type()) { + case T_BYTE: { + if (is_weak_cas) { + load_store = kit->gvn().transform(new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo)); + } else { + load_store = kit->gvn().transform(new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo)); + } + break; + } + case T_SHORT: { + if (is_weak_cas) { + load_store = kit->gvn().transform(new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo)); + } else { + load_store = kit->gvn().transform(new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo)); + } + break; + } + case T_INT: { + if (is_weak_cas) { + load_store = kit->gvn().transform(new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo)); + } else { + load_store = kit->gvn().transform(new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo)); + } + break; + } + case T_LONG: { + if (is_weak_cas) { + load_store = kit->gvn().transform(new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo)); + } else { + load_store = kit->gvn().transform(new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo)); + } + break; + } + default: + ShouldNotReachHere(); + } + } + + access.set_raw_access(load_store); + pin_atomic_op(access); + + return load_store; +} + +Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + Node* mem = access.memory(); + Node* adr = access.addr().node(); + const TypePtr* adr_type = access.addr().type(); + Node* load_store = NULL; + + if (access.is_oop()) { +#ifdef _LP64 + if (adr->bottom_type()->is_ptr_to_narrowoop()) { + Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop())); + load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop())); + } else +#endif + { + load_store = kit->gvn().transform(new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr())); + } + } else { + switch (access.type()) { + case T_BYTE: + load_store = kit->gvn().transform(new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type)); + break; + case T_SHORT: + load_store = kit->gvn().transform(new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type)); + break; + case T_INT: + load_store = kit->gvn().transform(new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type)); + break; + case T_LONG: + load_store = kit->gvn().transform(new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type)); + break; + default: + ShouldNotReachHere(); + } + } + + access.set_raw_access(load_store); + pin_atomic_op(access); + +#ifdef _LP64 + if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) { + return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type())); + } +#endif + + return load_store; +} + +Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const { + Node* load_store = NULL; + GraphKit* kit = access.kit(); + Node* adr = access.addr().node(); + const TypePtr* adr_type = access.addr().type(); + Node* mem = access.memory(); + + switch(access.type()) { + case T_BYTE: + load_store = kit->gvn().transform(new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type)); + break; + case T_SHORT: + load_store = kit->gvn().transform(new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type)); + break; + case T_INT: + load_store = kit->gvn().transform(new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type)); + break; + case T_LONG: + load_store = kit->gvn().transform(new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type)); + break; + default: + ShouldNotReachHere(); + } + + access.set_raw_access(load_store); + pin_atomic_op(access); + + return load_store; +} + +Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + C2AccessFence fence(access); + resolve_address(access); + return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); +} + +Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + C2AccessFence fence(access); + resolve_address(access); + return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); +} + +Node* BarrierSetC2::atomic_xchg_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const { + C2AccessFence fence(access); + resolve_address(access); + return atomic_xchg_at_resolved(access, new_val, value_type); +} + +Node* BarrierSetC2::atomic_add_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const { + C2AccessFence fence(access); + resolve_address(access); + return atomic_add_at_resolved(access, new_val, value_type); +} + +void BarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const { + // Exclude the header but include array length to copy by 8 bytes words. + // Can't use base_offset_in_bytes(bt) since basic type is unknown. + int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() : + instanceOopDesc::base_offset_in_bytes(); + // base_off: + // 8 - 32-bit VM + // 12 - 64-bit VM, compressed klass + // 16 - 64-bit VM, normal klass + if (base_off % BytesPerLong != 0) { + assert(UseCompressedClassPointers, ""); + if (is_array) { + // Exclude length to copy by 8 bytes words. + base_off += sizeof(int); + } else { + // Include klass to copy by 8 bytes words. + base_off = instanceOopDesc::klass_offset_in_bytes(); + } + assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment"); + } + Node* src_base = kit->basic_plus_adr(src, base_off); + Node* dst_base = kit->basic_plus_adr(dst, base_off); + + // Compute the length also, if needed: + Node* countx = size; + countx = kit->gvn().transform(new SubXNode(countx, kit->MakeConX(base_off))); + countx = kit->gvn().transform(new URShiftXNode(countx, kit->intcon(LogBytesPerLong) )); + + const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; + + ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, NULL, dst_base, NULL, countx, false, false); + ac->set_clonebasic(); + Node* n = kit->gvn().transform(ac); + if (n == ac) { + kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type); + } else { + kit->set_all_memory(n); + } +} diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp new file mode 100644 index 0000000..37eda34 --- /dev/null +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHARED_C2_BARRIERSETC2_HPP +#define SHARE_GC_SHARED_C2_BARRIERSETC2_HPP + +#include "memory/allocation.hpp" +#include "oops/accessDecorators.hpp" +#include "opto/memnode.hpp" +#include "utilities/globalDefinitions.hpp" + +// This means the access is mismatched. This means the value of an access +// is not equivalent to the value pointed to by the address. +const DecoratorSet C2_MISMATCHED = DECORATOR_LAST << 1; +// The access may not be aligned to its natural size. +const DecoratorSet C2_UNALIGNED = DECORATOR_LAST << 2; +// The atomic cmpxchg is weak, meaning that spurious false negatives are allowed, +// but never false positives. +const DecoratorSet C2_WEAK_CMPXCHG = DECORATOR_LAST << 3; +// This denotes that a load has control dependency. +const DecoratorSet C2_CONTROL_DEPENDENT_LOAD = DECORATOR_LAST << 4; +// This denotes that a load that must be pinned. +const DecoratorSet C2_PINNED_LOAD = DECORATOR_LAST << 5; +// This denotes that the access is produced from the sun.misc.Unsafe intrinsics. +const DecoratorSet C2_UNSAFE_ACCESS = DECORATOR_LAST << 6; +// This denotes that the access mutates state. +const DecoratorSet C2_WRITE_ACCESS = DECORATOR_LAST << 7; +// This denotes that the access reads state. +const DecoratorSet C2_READ_ACCESS = DECORATOR_LAST << 8; + +class GraphKit; +class IdealKit; +class Node; +class Type; +class TypePtr; +class PhaseMacroExpand; + +// This class wraps a node and a type. +class C2AccessValue: public StackObj { +protected: + Node* _node; + const Type* _type; + +public: + C2AccessValue(Node* node, const Type* type) : + _node(node), + _type(type) {} + + Node* node() const { return _node; } + const Type* type() const { return _type; } + + void set_node(Node* node) { _node = node; } +}; + +// This class wraps a node and a pointer type. +class C2AccessValuePtr: public C2AccessValue { + int _alias_idx; + +public: + C2AccessValuePtr(Node* node, const TypePtr* type) : + C2AccessValue(node, reinterpret_cast(type)) {} + + const TypePtr* type() const { return reinterpret_cast(_type); } + int alias_idx() const { return _alias_idx; } +}; + +// This class wraps a bunch of context parameters thare are passed around in the +// BarrierSetC2 backend hierarchy, for loads and stores, to reduce boiler plate. +class C2Access: public StackObj { +protected: + GraphKit* _kit; + DecoratorSet _decorators; + BasicType _type; + Node* _base; + C2AccessValuePtr& _addr; + Node* _raw_access; + + void fixup_decorators(); + void* barrier_set_state() const; + +public: + C2Access(GraphKit* kit, DecoratorSet decorators, + BasicType type, Node* base, C2AccessValuePtr& addr) : + _kit(kit), + _decorators(decorators), + _type(type), + _base(base), + _addr(addr), + _raw_access(NULL) + { + fixup_decorators(); + } + + GraphKit* kit() const { return _kit; } + DecoratorSet decorators() const { return _decorators; } + Node* base() const { return _base; } + C2AccessValuePtr& addr() const { return _addr; } + BasicType type() const { return _type; } + bool is_oop() const { return _type == T_OBJECT || _type == T_ARRAY; } + bool is_raw() const { return (_decorators & AS_RAW) != 0; } + Node* raw_access() const { return _raw_access; } + + void set_raw_access(Node* raw_access) { _raw_access = raw_access; } + virtual void set_memory() {} // no-op for normal accesses, but not for atomic accesses. + + MemNode::MemOrd mem_node_mo() const; + bool needs_cpu_membar() const; + + template + T barrier_set_state_as() const { + return reinterpret_cast(barrier_set_state()); + } +}; + +// This class wraps a bunch of context parameters thare are passed around in the +// BarrierSetC2 backend hierarchy, for atomic accesses, to reduce boiler plate. +class C2AtomicAccess: public C2Access { + Node* _memory; + uint _alias_idx; + bool _needs_pinning; + +public: + C2AtomicAccess(GraphKit* kit, DecoratorSet decorators, BasicType type, + Node* base, C2AccessValuePtr& addr, uint alias_idx) : + C2Access(kit, decorators, type, base, addr), + _memory(NULL), + _alias_idx(alias_idx), + _needs_pinning(true) {} + + // Set the memory node based on the current memory slice. + virtual void set_memory(); + + Node* memory() const { return _memory; } + uint alias_idx() const { return _alias_idx; } + bool needs_pinning() const { return _needs_pinning; } + + void set_needs_pinning(bool value) { _needs_pinning = value; } +}; + +// This is the top-level class for the backend of the Access API in C2. +// The top-level class is responsible for performing raw accesses. The +// various GC barrier sets inherit from the BarrierSetC2 class to sprinkle +// barriers into the accesses. +class BarrierSetC2: public CHeapObj { +protected: + virtual void resolve_address(C2Access& access) const; + virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const; + virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const; + + virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* val_type) const; + virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const; + virtual Node* atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const; + virtual Node* atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const; + +public: + // This is the entry-point for the backend to perform accesses through the Access API. + virtual Node* store_at(C2Access& access, C2AccessValue& val) const; + virtual Node* load_at(C2Access& access, const Type* val_type) const; + + virtual Node* atomic_cmpxchg_val_at(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* val_type) const; + virtual Node* atomic_cmpxchg_bool_at(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* val_type) const; + virtual Node* atomic_xchg_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const; + virtual Node* atomic_add_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const; + + virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const; + + // These are general helper methods used by C2 + virtual bool is_gc_barrier_node(Node* node) const { return false; } + virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { } + virtual bool array_copy_requires_gc_barriers(BasicType type) const { return false; } + virtual Node* step_over_gc_barrier(Node* c) const { return c; } + + virtual void loop_optimize_gc_barrier(Node* node) const { } + + // Allow barrier sets to have shared state that is preserved across a compilation unit. + // This could for example comprise macro nodes to be expanded during macro expansion. + virtual void* create_barrier_state() const { return NULL; } + // If the BarrierSetC2 state has kept macro nodes in its compilation unit state to be + // expanded later, then now is the time to do so. + virtual bool expand_macro_nodes(PhaseMacroExpand* macro) const { return false; } +}; + +#endif // SHARE_GC_SHARED_C2_BARRIERSETC2_HPP diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp new file mode 100644 index 0000000..181b365 --- /dev/null +++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "ci/ciUtilities.hpp" +#include "gc/shared/cardTable.hpp" +#include "gc/shared/cardTableBarrierSet.hpp" +#include "gc/shared/c2/cardTableBarrierSetC2.hpp" +#include "opto/arraycopynode.hpp" +#include "opto/graphKit.hpp" +#include "opto/idealKit.hpp" +#include "opto/macro.hpp" +#include "utilities/macros.hpp" + +#define __ ideal. + +Node* CardTableBarrierSetC2::byte_map_base_node(GraphKit* kit) const { + // Get base of card map + jbyte* card_table_base = ci_card_table_address(); + if (card_table_base != NULL) { + return kit->makecon(TypeRawPtr::make((address)card_table_base)); + } else { + return kit->null(); + } +} + +// vanilla/CMS post barrier +// Insert a write-barrier store. This is to let generational GC work; we have +// to flag all oop-stores before the next GC point. +void CardTableBarrierSetC2::post_barrier(GraphKit* kit, + Node* ctl, + Node* oop_store, + Node* obj, + Node* adr, + uint adr_idx, + Node* val, + BasicType bt, + bool use_precise) const { + CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set()); + CardTable* ct = ctbs->card_table(); + // No store check needed if we're storing a NULL or an old object + // (latter case is probably a string constant). The concurrent + // mark sweep garbage collector, however, needs to have all nonNull + // oop updates flagged via card-marks. + if (val != NULL && val->is_Con()) { + // must be either an oop or NULL + const Type* t = val->bottom_type(); + if (t == TypePtr::NULL_PTR || t == Type::TOP) + // stores of null never (?) need barriers + return; + } + + if (use_ReduceInitialCardMarks() + && obj == kit->just_allocated_object(kit->control())) { + // We can skip marks on a freshly-allocated object in Eden. + // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp. + // That routine informs GC to take appropriate compensating steps, + // upon a slow-path allocation, so as to make this card-mark + // elision safe. + return; + } + + if (!use_precise) { + // All card marks for a (non-array) instance are in one place: + adr = obj; + } + // (Else it's an array (or unknown), and we want more precise card marks.) + assert(adr != NULL, ""); + + IdealKit ideal(kit, true); + + // Convert the pointer to an int prior to doing math on it + Node* cast = __ CastPX(__ ctrl(), adr); + + // Divide by card size + Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) ); + + // Combine card table base and card offset + Node* card_adr = __ AddP(__ top(), byte_map_base_node(kit), card_offset ); + + // Get the alias_index for raw card-mark memory + int adr_type = Compile::AliasIdxRaw; + Node* zero = __ ConI(0); // Dirty card value + + if (UseCondCardMark) { + if (ct->scanned_concurrently()) { + kit->insert_mem_bar(Op_MemBarVolatile, oop_store); + __ sync_kit(kit); + } + // The classic GC reference write barrier is typically implemented + // as a store into the global card mark table. Unfortunately + // unconditional stores can result in false sharing and excessive + // coherence traffic as well as false transactional aborts. + // UseCondCardMark enables MP "polite" conditional card mark + // stores. In theory we could relax the load from ctrl() to + // no_ctrl, but that doesn't buy much latitude. + Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, T_BYTE, adr_type); + __ if_then(card_val, BoolTest::ne, zero); + } + + // Smash zero into card + if(!ct->scanned_concurrently()) { + __ store(__ ctrl(), card_adr, zero, T_BYTE, adr_type, MemNode::unordered); + } else { + // Specialized path for CM store barrier + __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, T_BYTE, adr_type); + } + + if (UseCondCardMark) { + __ end_if(); + } + + // Final sync IdealKit and GraphKit. + kit->final_sync(ideal); +} + +void CardTableBarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const { + BarrierSetC2::clone(kit, src, dst, size, is_array); + const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; + + // If necessary, emit some card marks afterwards. (Non-arrays only.) + bool card_mark = !is_array && !use_ReduceInitialCardMarks(); + if (card_mark) { + assert(!is_array, ""); + // Put in store barrier for any and all oops we are sticking + // into this object. (We could avoid this if we could prove + // that the object type contains no oop fields at all.) + Node* no_particular_value = NULL; + Node* no_particular_field = NULL; + int raw_adr_idx = Compile::AliasIdxRaw; + post_barrier(kit, kit->control(), + kit->memory(raw_adr_type), + dst, + no_particular_field, + raw_adr_idx, + no_particular_value, + T_OBJECT, + false); + } +} + +bool CardTableBarrierSetC2::use_ReduceInitialCardMarks() const { + return ReduceInitialCardMarks; +} + +bool CardTableBarrierSetC2::is_gc_barrier_node(Node* node) const { + return ModRefBarrierSetC2::is_gc_barrier_node(node) || node->Opcode() == Op_StoreCM; +} + +void CardTableBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { + assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); + Node *shift = node->unique_out(); + Node *addp = shift->unique_out(); + for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) { + Node *mem = addp->last_out(j); + if (UseCondCardMark && mem->is_Load()) { + assert(mem->Opcode() == Op_LoadB, "unexpected code shape"); + // The load is checking if the card has been written so + // replace it with zero to fold the test. + macro->replace_node(mem, macro->intcon(0)); + continue; + } + assert(mem->is_Store(), "store required"); + macro->replace_node(mem, mem->in(MemNode::Memory)); + } +} + +bool CardTableBarrierSetC2::array_copy_requires_gc_barriers(BasicType type) const { + return !use_ReduceInitialCardMarks(); +} diff --git a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp new file mode 100644 index 0000000..cd2f1b5 --- /dev/null +++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP +#define SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP + +#include "gc/shared/c2/modRefBarrierSetC2.hpp" + +class CardTableBarrierSetC2: public ModRefBarrierSetC2 { +protected: + virtual void post_barrier(GraphKit* kit, + Node* ctl, + Node* store, + Node* obj, + Node* adr, + uint adr_idx, + Node* val, + BasicType bt, + bool use_precise) const; + + Node* byte_map_base_node(GraphKit* kit) const; + +public: + virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const; + virtual bool is_gc_barrier_node(Node* node) const; + virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; + virtual bool array_copy_requires_gc_barriers(BasicType type) const; + + bool use_ReduceInitialCardMarks() const; +}; + +#endif // SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP diff --git a/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.cpp new file mode 100644 index 0000000..5b3b42a --- /dev/null +++ b/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.cpp @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "opto/arraycopynode.hpp" +#include "opto/graphKit.hpp" +#include "opto/idealKit.hpp" +#include "opto/narrowptrnode.hpp" +#include "gc/shared/c2/modRefBarrierSetC2.hpp" +#include "utilities/macros.hpp" + +Node* ModRefBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const { + DecoratorSet decorators = access.decorators(); + GraphKit* kit = access.kit(); + + const TypePtr* adr_type = access.addr().type(); + Node* adr = access.addr().node(); + + bool on_array = (decorators & IN_HEAP_ARRAY) != 0; + bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; + bool on_heap = (decorators & IN_HEAP) != 0; + bool use_precise = on_array || anonymous; + + if (!access.is_oop() || (!on_heap && !anonymous)) { + return BarrierSetC2::store_at_resolved(access, val); + } + + uint adr_idx = kit->C->get_alias_index(adr_type); + assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); + + pre_barrier(kit, true /* do_load */, kit->control(), access.base(), adr, adr_idx, val.node(), + static_cast(val.type()), NULL /* pre_val */, access.type()); + Node* store = BarrierSetC2::store_at_resolved(access, val); + post_barrier(kit, kit->control(), access.raw_access(), access.base(), adr, adr_idx, val.node(), + access.type(), use_precise); + + return store; +} + +Node* ModRefBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + + if (!access.is_oop()) { + return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); + } + + pre_barrier(kit, false /* do_load */, + kit->control(), NULL, NULL, max_juint, NULL, NULL, + expected_val /* pre_val */, T_OBJECT); + + Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type); + + post_barrier(kit, kit->control(), access.raw_access(), access.base(), + access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true); + + return result; +} + +Node* ModRefBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + + if (!access.is_oop()) { + return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); + } + + pre_barrier(kit, false /* do_load */, + kit->control(), NULL, NULL, max_juint, NULL, NULL, + expected_val /* pre_val */, T_OBJECT); + + Node* load_store = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type); + + // Emit the post barrier only when the actual store happened. This makes sense + // to check only for LS_cmp_* that can fail to set the value. + // LS_cmp_exchange does not produce any branches by default, so there is no + // boolean result to piggyback on. TODO: When we merge CompareAndSwap with + // CompareAndExchange and move branches here, it would make sense to conditionalize + // post_barriers for LS_cmp_exchange as well. + // + // CAS success path is marked more likely since we anticipate this is a performance + // critical path, while CAS failure path can use the penalty for going through unlikely + // path as backoff. Which is still better than doing a store barrier there. + IdealKit ideal(kit); + ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); { + kit->sync_kit(ideal); + post_barrier(kit, ideal.ctrl(), access.raw_access(), access.base(), + access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true); + ideal.sync_kit(kit); + } ideal.end_if(); + kit->final_sync(ideal); + + return load_store; +} + +Node* ModRefBarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const { + GraphKit* kit = access.kit(); + + Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type); + if (!access.is_oop()) { + return result; + } + + // Don't need to load pre_val. The old value is returned by load_store. + // The pre_barrier can execute after the xchg as long as no safepoint + // gets inserted between them. + pre_barrier(kit, false /* do_load */, + kit->control(), NULL, NULL, max_juint, NULL, NULL, + result /* pre_val */, T_OBJECT); + post_barrier(kit, kit->control(), access.raw_access(), access.base(), access.addr().node(), + access.alias_idx(), new_val, T_OBJECT, true); + + return result; +} diff --git a/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.hpp new file mode 100644 index 0000000..faa356a --- /dev/null +++ b/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.hpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_GC_SHARED_C2_MODREFBARRIERSETC2_HPP +#define SHARE_GC_SHARED_C2_MODREFBARRIERSETC2_HPP + +#include "gc/shared/c2/barrierSetC2.hpp" + +class TypeOopPtr; + +class ModRefBarrierSetC2: public BarrierSetC2 { +protected: + virtual void pre_barrier(GraphKit* kit, + bool do_load, + Node* ctl, + Node* obj, + Node* adr, + uint adr_idx, + Node* val, + const TypeOopPtr* val_type, + Node* pre_val, + BasicType bt) const {} + + virtual void post_barrier(GraphKit* kit, + Node* ctl, + Node* store, + Node* obj, + Node* adr, + uint adr_idx, + Node* val, + BasicType bt, + bool use_precise) const {} + + virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const; + + virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const; + virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val, + Node* new_val, const Type* value_type) const; + virtual Node* atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const; +}; + +#endif // SHARE_GC_SHARED_C2_MODREFBARRIERSETC2_HPP diff --git a/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp b/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp index 69e177b..fe0ba4d 100644 --- a/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp +++ b/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp @@ -37,8 +37,12 @@ #ifdef COMPILER1 #include "gc/shared/c1/cardTableBarrierSetC1.hpp" #endif +#ifdef COMPILER2 +#include "gc/shared/c2/cardTableBarrierSetC2.hpp" +#endif class CardTableBarrierSetC1; +class CardTableBarrierSetC2; // This kind of "BarrierSet" allows a "CollectedHeap" to detect and // enumerate ref fields that have been modified (since the last @@ -46,10 +50,12 @@ class CardTableBarrierSetC1; CardTableBarrierSet::CardTableBarrierSet(BarrierSetAssembler* barrier_set_assembler, BarrierSetC1* barrier_set_c1, + BarrierSetC2* barrier_set_c2, CardTable* card_table, const BarrierSet::FakeRtti& fake_rtti) : ModRefBarrierSet(barrier_set_assembler, barrier_set_c1, + barrier_set_c2, fake_rtti.add_tag(BarrierSet::CardTableBarrierSet)), _defer_initial_card_mark(false), _card_table(card_table) @@ -58,6 +64,7 @@ CardTableBarrierSet::CardTableBarrierSet(BarrierSetAssembler* barrier_set_assemb CardTableBarrierSet::CardTableBarrierSet(CardTable* card_table) : ModRefBarrierSet(make_barrier_set_assembler(), make_barrier_set_c1(), + make_barrier_set_c2(), BarrierSet::FakeRtti(BarrierSet::CardTableBarrierSet)), _defer_initial_card_mark(false), _card_table(card_table) @@ -155,7 +162,7 @@ void CardTableBarrierSet::initialize_deferred_card_mark_barriers() { // Used for ReduceInitialCardMarks (when COMPILER2 or JVMCI is used); // otherwise remains unused. #if defined(COMPILER2) || INCLUDE_JVMCI - _defer_initial_card_mark = is_server_compilation_mode_vm() && ReduceInitialCardMarks && can_elide_tlab_store_barriers() + _defer_initial_card_mark = is_server_compilation_mode_vm() && ReduceInitialCardMarks && (DeferInitialCardMark || card_mark_must_follow_store()); #else assert(_defer_initial_card_mark == false, "Who would set it?"); diff --git a/src/hotspot/share/gc/shared/cardTableBarrierSet.hpp b/src/hotspot/share/gc/shared/cardTableBarrierSet.hpp index 647a380..dfdbfc6 100644 --- a/src/hotspot/share/gc/shared/cardTableBarrierSet.hpp +++ b/src/hotspot/share/gc/shared/cardTableBarrierSet.hpp @@ -54,6 +54,7 @@ class CardTableBarrierSet: public ModRefBarrierSet { CardTableBarrierSet(BarrierSetAssembler* barrier_set_assembler, BarrierSetC1* barrier_set_c1, + BarrierSetC2* barrier_set_c2, CardTable* card_table, const BarrierSet::FakeRtti& fake_rtti); @@ -89,23 +90,6 @@ class CardTableBarrierSet: public ModRefBarrierSet { // remembered set. void flush_deferred_card_mark_barrier(JavaThread* thread); - // Can a compiler initialize a new object without store barriers? - // This permission only extends from the creation of a new object - // via a TLAB up to the first subsequent safepoint. If such permission - // is granted for this heap type, the compiler promises to call - // defer_store_barrier() below on any slow path allocation of - // a new object for which such initializing store barriers will - // have been elided. G1, like CMS, allows this, but should be - // ready to provide a compensating write barrier as necessary - // if that storage came out of a non-young region. The efficiency - // of this implementation depends crucially on being able to - // answer very efficiently in constant time whether a piece of - // storage in the heap comes from a young region or not. - // See ReduceInitialCardMarks. - virtual bool can_elide_tlab_store_barriers() const { - return true; - } - // If a compiler is eliding store barriers for TLAB-allocated objects, // we will be informed of a slow-path allocation by a call // to on_slowpath_allocation_exit() below. Such a call precedes the diff --git a/src/hotspot/share/gc/shared/modRefBarrierSet.hpp b/src/hotspot/share/gc/shared/modRefBarrierSet.hpp index ec4399d..fd86292 100644 --- a/src/hotspot/share/gc/shared/modRefBarrierSet.hpp +++ b/src/hotspot/share/gc/shared/modRefBarrierSet.hpp @@ -34,9 +34,11 @@ class ModRefBarrierSet: public BarrierSet { protected: ModRefBarrierSet(BarrierSetAssembler* barrier_set_assembler, BarrierSetC1* barrier_set_c1, + BarrierSetC2* barrier_set_c2, const BarrierSet::FakeRtti& fake_rtti) : BarrierSet(barrier_set_assembler, barrier_set_c1, + barrier_set_c2, fake_rtti.add_tag(BarrierSet::ModRef)) { } ~ModRefBarrierSet() { } diff --git a/src/hotspot/share/opto/arraycopynode.cpp b/src/hotspot/share/opto/arraycopynode.cpp index e61a053..5e4828e 100644 --- a/src/hotspot/share/opto/arraycopynode.cpp +++ b/src/hotspot/share/opto/arraycopynode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,9 +23,13 @@ */ #include "precompiled.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/c2/barrierSetC2.hpp" +#include "gc/shared/c2/cardTableBarrierSetC2.hpp" #include "opto/arraycopynode.hpp" #include "opto/graphKit.hpp" #include "runtime/sharedRuntime.hpp" +#include "utilities/macros.hpp" ArrayCopyNode::ArrayCopyNode(Compile* C, bool alloc_tightly_coupled, bool has_negative_length_guard) : CallNode(arraycopy_type(), NULL, TypeRawPtr::BOTTOM), @@ -252,7 +256,9 @@ bool ArrayCopyNode::prepare_array_copy(PhaseGVN *phase, bool can_reshape, return false; } - if (dest_elem == T_OBJECT && (!is_alloc_tightly_coupled() || !GraphKit::use_ReduceInitialCardMarks())) { + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + if (dest_elem == T_OBJECT && (!is_alloc_tightly_coupled() || + bs->array_copy_requires_gc_barriers(T_OBJECT))) { // It's an object array copy but we can't emit the card marking // that is needed return false; @@ -434,9 +440,10 @@ bool ArrayCopyNode::finish_transform(PhaseGVN *phase, bool can_reshape, if (is_clonebasic()) { Node* out_mem = proj_out(TypeFunc::Memory); + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); if (out_mem->outcnt() != 1 || !out_mem->raw_out(0)->is_MergeMem() || out_mem->raw_out(0)->outcnt() != 1 || !out_mem->raw_out(0)->raw_out(0)->is_MemBar()) { - assert(!GraphKit::use_ReduceInitialCardMarks(), "can only happen with card marking"); + assert(bs->array_copy_requires_gc_barriers(T_OBJECT), "can only happen with card marking"); return false; } @@ -643,47 +650,13 @@ bool ArrayCopyNode::may_modify_helper(const TypeOopPtr *t_oop, Node* n, PhaseTra return false; } -static Node* step_over_gc_barrier(Node* c) { - if (UseG1GC && !GraphKit::use_ReduceInitialCardMarks() && - c != NULL && c->is_Region() && c->req() == 3) { - for (uint i = 1; i < c->req(); i++) { - if (c->in(i) != NULL && c->in(i)->is_Region() && - c->in(i)->req() == 3) { - Node* r = c->in(i); - for (uint j = 1; j < r->req(); j++) { - if (r->in(j) != NULL && r->in(j)->is_Proj() && - r->in(j)->in(0) != NULL && - r->in(j)->in(0)->Opcode() == Op_CallLeaf && - r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post)) { - Node* call = r->in(j)->in(0); - c = c->in(i == 1 ? 2 : 1); - if (c != NULL) { - c = c->in(0); - if (c != NULL) { - c = c->in(0); - assert(call->in(0) == NULL || - call->in(0)->in(0) == NULL || - call->in(0)->in(0)->in(0) == NULL || - call->in(0)->in(0)->in(0)->in(0) == NULL || - call->in(0)->in(0)->in(0)->in(0)->in(0) == NULL || - c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape"); - return c; - } - } - } - } - } - } - } - return c; -} - bool ArrayCopyNode::may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase, ArrayCopyNode*& ac) { Node* c = mb->in(0); - // step over g1 gc barrier if we're at a clone with ReduceInitialCardMarks off - c = step_over_gc_barrier(c); + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + // step over g1 gc barrier if we're at e.g. a clone with ReduceInitialCardMarks off + c = bs->step_over_gc_barrier(c); CallNode* call = NULL; if (c != NULL && c->is_Region()) { @@ -699,7 +672,11 @@ bool ArrayCopyNode::may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTra } } else if (may_modify_helper(t_oop, c->in(0), phase, call)) { ac = call->isa_ArrayCopy(); - assert(c == mb->in(0) || (ac != NULL && ac->is_clonebasic() && !GraphKit::use_ReduceInitialCardMarks()), "only for clone"); +#ifdef ASSERT + bool use_ReduceInitialCardMarks = BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) && + static_cast(bs)->use_ReduceInitialCardMarks(); + assert(c == mb->in(0) || (ac != NULL && ac->is_clonebasic() && !use_ReduceInitialCardMarks), "only for clone"); +#endif return true; } @@ -747,4 +724,3 @@ bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransf } return false; } - diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index 7c1ab6d..f6a8c5f 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -33,6 +33,8 @@ #include "compiler/compileLog.hpp" #include "compiler/disassembler.hpp" #include "compiler/oopMap.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/c2/barrierSetC2.hpp" #include "memory/resourceArea.hpp" #include "opto/addnode.hpp" #include "opto/block.hpp" @@ -637,6 +639,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr _stub_function(NULL), _stub_entry_point(NULL), _method(target), + _barrier_set_state(BarrierSet::barrier_set()->barrier_set_c2()->create_barrier_state()), _entry_bci(osr_bci), _initial_gvn(NULL), _for_igvn(NULL), @@ -772,17 +775,12 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr StartNode* s = new StartNode(root(), tf()->domain()); initial_gvn()->set_type_bottom(s); init_start(s); - if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) { + if (method()->intrinsic_id() == vmIntrinsics::_Reference_get) { // With java.lang.ref.reference.get() we must go through the - // intrinsic when G1 is enabled - even when get() is the root + // intrinsic - even when get() is the root // method of the compile - so that, if necessary, the value in // the referent field of the reference object gets recorded by // the pre-barrier code. - // Specifically, if G1 is enabled, the value in the referent - // field is recorded by the G1 SATB pre barrier. This will - // result in the referent being marked live and the reference - // object removed from the list of discovered references during - // reference processing. cg = find_intrinsic(method(), false); } if (cg == NULL) { diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index ad98e6b..de6adc9 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -359,6 +359,9 @@ class Compile : public Phase { const char* _stub_name; // Name of stub or adapter being compiled, or NULL address _stub_entry_point; // Compile code entry for generated stub, or NULL + // For GC + void* _barrier_set_state; + // Control of this compilation. int _num_loop_opts; // Number of iterations for doing loop optimiztions int _max_inline_size; // Max inline size for this compilation @@ -530,6 +533,8 @@ class Compile : public Phase { public: + void* barrier_set_state() const { return _barrier_set_state; } + outputStream* print_inlining_stream() const { assert(print_inlining() || print_intrinsics(), "PrintInlining off?"); return _print_inlining_stream; diff --git a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp index e9198d3..bf92c9a 100644 --- a/src/hotspot/share/opto/escape.cpp +++ b/src/hotspot/share/opto/escape.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "ci/bcEscapeAnalyzer.hpp" #include "compiler/compileLog.hpp" +#include "gc/shared/c2/barrierSetC2.hpp" #include "libadt/vectset.hpp" #include "memory/allocation.hpp" #include "memory/resourceArea.hpp" @@ -978,10 +979,9 @@ void ConnectionGraph::process_call_arguments(CallNode *call) { arg_has_oops && (i > TypeFunc::Parms); #ifdef ASSERT if (!(is_arraycopy || + BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(call) || (call->as_CallLeaf()->_name != NULL && - (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 || - strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 || - strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 || + (strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 || strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32C") == 0 || strcmp(call->as_CallLeaf()->_name, "updateBytesAdler32") == 0 || strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 || @@ -3283,9 +3283,7 @@ void ConnectionGraph::split_unique_types(GrowableArray &alloc_worklist, (op == Op_StrCompressedCopy || op == Op_StrInflatedCopy)) { // They overwrite memory edge corresponding to destination array, memnode_worklist.append_if_missing(use); - } else if (!(op == Op_StoreCM || - (op == Op_CallLeaf && use->as_CallLeaf()->_name != NULL && - strcmp(use->as_CallLeaf()->_name, "g1_wb_pre") == 0) || + } else if (!(BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(use) || op == Op_AryEq || op == Op_StrComp || op == Op_HasNegatives || op == Op_StrCompressedCopy || op == Op_StrInflatedCopy || op == Op_StrEquals || op == Op_StrIndexOf || op == Op_StrIndexOfChar)) { diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp index fbebfdf..871e4ac 100644 --- a/src/hotspot/share/opto/graphKit.cpp +++ b/src/hotspot/share/opto/graphKit.cpp @@ -25,13 +25,9 @@ #include "precompiled.hpp" #include "ci/ciUtilities.hpp" #include "compiler/compileLog.hpp" -#include "gc/g1/g1BarrierSet.hpp" -#include "gc/g1/g1CardTable.hpp" #include "gc/g1/heapRegion.hpp" #include "gc/shared/barrierSet.hpp" -#include "gc/shared/cardTable.hpp" -#include "gc/shared/cardTableBarrierSet.hpp" -#include "gc/shared/collectedHeap.hpp" +#include "gc/shared/c2/barrierSetC2.hpp" #include "interpreter/interpreter.hpp" #include "memory/resourceArea.hpp" #include "opto/addnode.hpp" @@ -48,16 +44,14 @@ #include "opto/runtime.hpp" #include "runtime/deoptimization.hpp" #include "runtime/sharedRuntime.hpp" -#if INCLUDE_ALL_GCS -#include "gc/g1/g1ThreadLocalData.hpp" -#endif // INCLUDE_ALL_GCS //----------------------------GraphKit----------------------------------------- // Main utility constructor. GraphKit::GraphKit(JVMState* jvms) : Phase(Phase::Parser), _env(C->env()), - _gvn(*C->initial_gvn()) + _gvn(*C->initial_gvn()), + _barrier_set(BarrierSet::barrier_set()->barrier_set_c2()) { _exceptions = jvms->map()->next_exception(); if (_exceptions != NULL) jvms->map()->set_next_exception(NULL); @@ -68,7 +62,8 @@ GraphKit::GraphKit(JVMState* jvms) GraphKit::GraphKit() : Phase(Phase::Parser), _env(C->env()), - _gvn(*C->initial_gvn()) + _gvn(*C->initial_gvn()), + _barrier_set(BarrierSet::barrier_set()->barrier_set_c2()) { _exceptions = NULL; set_map(NULL); @@ -611,8 +606,7 @@ void GraphKit::builtin_throw(Deoptimization::DeoptReason reason, Node* arg) { Node *adr = basic_plus_adr(ex_node, ex_node, offset); const TypeOopPtr* val_type = TypeOopPtr::make_from_klass(env()->String_klass()); - // Conservatively release stores of object references. - Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT, MemNode::release); + Node *store = access_store_at(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT, IN_HEAP); add_exception_state(make_exception_state(ex_node)); return; @@ -1551,137 +1545,142 @@ Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt, return st; } +Node* GraphKit::access_store_at(Node* ctl, + Node* obj, + Node* adr, + const TypePtr* adr_type, + Node* val, + const Type* val_type, + BasicType bt, + DecoratorSet decorators) { + // Transformation of a value which could be NULL pointer (CastPP #NULL) + // could be delayed during Parse (for example, in adjust_map_after_if()). + // Execute transformation here to avoid barrier generation in such case. + if (_gvn.type(val) == TypePtr::NULL_PTR) { + val = _gvn.makecon(TypePtr::NULL_PTR); + } -void GraphKit::pre_barrier(bool do_load, - Node* ctl, - Node* obj, - Node* adr, - uint adr_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) { - - BarrierSet* bs = BarrierSet::barrier_set(); set_control(ctl); - switch (bs->kind()) { - case BarrierSet::G1BarrierSet: - g1_write_barrier_pre(do_load, obj, adr, adr_idx, val, val_type, pre_val, bt); - break; - - case BarrierSet::CardTableBarrierSet: - break; + if (stopped()) { + return top(); // Dead path ? + } - default : - ShouldNotReachHere(); + assert(val != NULL, "not dead path"); + C2AccessValuePtr addr(adr, adr_type); + C2AccessValue value(val, val_type); + C2Access access(this, decorators | C2_WRITE_ACCESS, bt, obj, addr); + if (access.is_raw()) { + return _barrier_set->BarrierSetC2::store_at(access, value); + } else { + return _barrier_set->store_at(access, value); } } -bool GraphKit::can_move_pre_barrier() const { - BarrierSet* bs = BarrierSet::barrier_set(); - switch (bs->kind()) { - case BarrierSet::G1BarrierSet: - return true; // Can move it if no safepoint - - case BarrierSet::CardTableBarrierSet: - return true; // There is no pre-barrier +Node* GraphKit::access_load_at(Node* obj, // containing obj + Node* adr, // actual adress to store val at + const TypePtr* adr_type, + const Type* val_type, + BasicType bt, + DecoratorSet decorators) { + if (stopped()) { + return top(); // Dead path ? + } - default : - ShouldNotReachHere(); + C2AccessValuePtr addr(adr, adr_type); + C2Access access(this, decorators | C2_READ_ACCESS, bt, obj, addr); + if (access.is_raw()) { + return _barrier_set->BarrierSetC2::load_at(access, val_type); + } else { + return _barrier_set->load_at(access, val_type); } - return false; } -void GraphKit::post_barrier(Node* ctl, - Node* store, - Node* obj, - Node* adr, - uint adr_idx, - Node* val, - BasicType bt, - bool use_precise) { - BarrierSet* bs = BarrierSet::barrier_set(); +Node* GraphKit::access_atomic_cmpxchg_val_at(Node* ctl, + Node* obj, + Node* adr, + const TypePtr* adr_type, + int alias_idx, + Node* expected_val, + Node* new_val, + const Type* value_type, + BasicType bt, + DecoratorSet decorators) { set_control(ctl); - switch (bs->kind()) { - case BarrierSet::G1BarrierSet: - g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise); - break; - - case BarrierSet::CardTableBarrierSet: - write_barrier_post(store, obj, adr, adr_idx, val, use_precise); - break; - - default : - ShouldNotReachHere(); - + C2AccessValuePtr addr(adr, adr_type); + C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS, + bt, obj, addr, alias_idx); + if (access.is_raw()) { + return _barrier_set->BarrierSetC2::atomic_cmpxchg_val_at(access, expected_val, new_val, value_type); + } else { + return _barrier_set->atomic_cmpxchg_val_at(access, expected_val, new_val, value_type); } } -Node* GraphKit::store_oop(Node* ctl, - Node* obj, - Node* adr, - const TypePtr* adr_type, - Node* val, - const TypeOopPtr* val_type, - BasicType bt, - bool use_precise, - MemNode::MemOrd mo, - bool mismatched) { - // Transformation of a value which could be NULL pointer (CastPP #NULL) - // could be delayed during Parse (for example, in adjust_map_after_if()). - // Execute transformation here to avoid barrier generation in such case. - if (_gvn.type(val) == TypePtr::NULL_PTR) - val = _gvn.makecon(TypePtr::NULL_PTR); - +Node* GraphKit::access_atomic_cmpxchg_bool_at(Node* ctl, + Node* obj, + Node* adr, + const TypePtr* adr_type, + int alias_idx, + Node* expected_val, + Node* new_val, + const Type* value_type, + BasicType bt, + DecoratorSet decorators) { set_control(ctl); - if (stopped()) return top(); // Dead path ? - - assert(bt == T_OBJECT, "sanity"); - assert(val != NULL, "not dead path"); - uint adr_idx = C->get_alias_index(adr_type); - assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); + C2AccessValuePtr addr(adr, adr_type); + C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS, + bt, obj, addr, alias_idx); + if (access.is_raw()) { + return _barrier_set->BarrierSetC2::atomic_cmpxchg_bool_at(access, expected_val, new_val, value_type); + } else { + return _barrier_set->atomic_cmpxchg_bool_at(access, expected_val, new_val, value_type); + } +} - pre_barrier(true /* do_load */, - control(), obj, adr, adr_idx, val, val_type, - NULL /* pre_val */, - bt); - - Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo, mismatched); - post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise); - return store; -} - -// Could be an array or object we don't know at compile time (unsafe ref.) -Node* GraphKit::store_oop_to_unknown(Node* ctl, - Node* obj, // containing obj - Node* adr, // actual adress to store val at - const TypePtr* adr_type, - Node* val, - BasicType bt, - MemNode::MemOrd mo, - bool mismatched) { - Compile::AliasType* at = C->alias_type(adr_type); - const TypeOopPtr* val_type = NULL; - if (adr_type->isa_instptr()) { - if (at->field() != NULL) { - // known field. This code is a copy of the do_put_xxx logic. - ciField* field = at->field(); - if (!field->type()->is_loaded()) { - val_type = TypeInstPtr::BOTTOM; - } else { - val_type = TypeOopPtr::make_from_klass(field->type()->as_klass()); - } - } - } else if (adr_type->isa_aryptr()) { - val_type = adr_type->is_aryptr()->elem()->make_oopptr(); +Node* GraphKit::access_atomic_xchg_at(Node* ctl, + Node* obj, + Node* adr, + const TypePtr* adr_type, + int alias_idx, + Node* new_val, + const Type* value_type, + BasicType bt, + DecoratorSet decorators) { + set_control(ctl); + C2AccessValuePtr addr(adr, adr_type); + C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS, + bt, obj, addr, alias_idx); + if (access.is_raw()) { + return _barrier_set->BarrierSetC2::atomic_xchg_at(access, new_val, value_type); + } else { + return _barrier_set->atomic_xchg_at(access, new_val, value_type); } - if (val_type == NULL) { - val_type = TypeInstPtr::BOTTOM; +} + +Node* GraphKit::access_atomic_add_at(Node* ctl, + Node* obj, + Node* adr, + const TypePtr* adr_type, + int alias_idx, + Node* new_val, + const Type* value_type, + BasicType bt, + DecoratorSet decorators) { + set_control(ctl); + C2AccessValuePtr addr(adr, adr_type); + C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS, bt, obj, addr, alias_idx); + if (access.is_raw()) { + return _barrier_set->BarrierSetC2::atomic_add_at(access, new_val, value_type); + } else { + return _barrier_set->atomic_add_at(access, new_val, value_type); } - return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo, mismatched); } +void GraphKit::access_clone(Node* ctl, Node* src, Node* dst, Node* size, bool is_array) { + set_control(ctl); + return _barrier_set->clone(this, src, dst, size, is_array); +} //-------------------------array_element_address------------------------- Node* GraphKit::array_element_address(Node* ary, Node* idx, BasicType elembt, @@ -3810,20 +3809,10 @@ void GraphKit::add_predicate(int nargs) { add_predicate_impl(Deoptimization::Reason_loop_limit_check, nargs); } -//----------------------------- store barriers ---------------------------- -#define __ ideal. - -bool GraphKit::use_ReduceInitialCardMarks() { - BarrierSet *bs = BarrierSet::barrier_set(); - return bs->is_a(BarrierSet::CardTableBarrierSet) - && barrier_set_cast(bs)->can_elide_tlab_store_barriers() - && ReduceInitialCardMarks; -} - void GraphKit::sync_kit(IdealKit& ideal) { - set_all_memory(__ merged_memory()); - set_i_o(__ i_o()); - set_control(__ ctrl()); + set_all_memory(ideal.merged_memory()); + set_i_o(ideal.i_o()); + set_control(ideal.ctrl()); } void GraphKit::final_sync(IdealKit& ideal) { @@ -3831,537 +3820,6 @@ void GraphKit::final_sync(IdealKit& ideal) { sync_kit(ideal); } -Node* GraphKit::byte_map_base_node() { - // Get base of card map - jbyte* card_table_base = ci_card_table_address(); - if (card_table_base != NULL) { - return makecon(TypeRawPtr::make((address)card_table_base)); - } else { - return null(); - } -} - -// vanilla/CMS post barrier -// Insert a write-barrier store. This is to let generational GC work; we have -// to flag all oop-stores before the next GC point. -void GraphKit::write_barrier_post(Node* oop_store, - Node* obj, - Node* adr, - uint adr_idx, - Node* val, - bool use_precise) { - // No store check needed if we're storing a NULL or an old object - // (latter case is probably a string constant). The concurrent - // mark sweep garbage collector, however, needs to have all nonNull - // oop updates flagged via card-marks. - if (val != NULL && val->is_Con()) { - // must be either an oop or NULL - const Type* t = val->bottom_type(); - if (t == TypePtr::NULL_PTR || t == Type::TOP) - // stores of null never (?) need barriers - return; - } - - if (use_ReduceInitialCardMarks() - && obj == just_allocated_object(control())) { - // We can skip marks on a freshly-allocated object in Eden. - // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp. - // That routine informs GC to take appropriate compensating steps, - // upon a slow-path allocation, so as to make this card-mark - // elision safe. - return; - } - - if (!use_precise) { - // All card marks for a (non-array) instance are in one place: - adr = obj; - } - // (Else it's an array (or unknown), and we want more precise card marks.) - assert(adr != NULL, ""); - - IdealKit ideal(this, true); - - // Convert the pointer to an int prior to doing math on it - Node* cast = __ CastPX(__ ctrl(), adr); - - // Divide by card size - assert(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet), - "Only one we handle so far."); - Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) ); - - // Combine card table base and card offset - Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset ); - - // Get the alias_index for raw card-mark memory - int adr_type = Compile::AliasIdxRaw; - Node* zero = __ ConI(0); // Dirty card value - BasicType bt = T_BYTE; - - if (UseConcMarkSweepGC && UseCondCardMark) { - insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier - __ sync_kit(this); - } - - if (UseCondCardMark) { - // The classic GC reference write barrier is typically implemented - // as a store into the global card mark table. Unfortunately - // unconditional stores can result in false sharing and excessive - // coherence traffic as well as false transactional aborts. - // UseCondCardMark enables MP "polite" conditional card mark - // stores. In theory we could relax the load from ctrl() to - // no_ctrl, but that doesn't buy much latitude. - Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, bt, adr_type); - __ if_then(card_val, BoolTest::ne, zero); - } - - // Smash zero into card - if( !UseConcMarkSweepGC ) { - __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::unordered); - } else { - // Specialized path for CM store barrier - __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type); - } - - if (UseCondCardMark) { - __ end_if(); - } - - // Final sync IdealKit and GraphKit. - final_sync(ideal); -} -/* - * Determine if the G1 pre-barrier can be removed. The pre-barrier is - * required by SATB to make sure all objects live at the start of the - * marking are kept alive, all reference updates need to any previous - * reference stored before writing. - * - * If the previous value is NULL there is no need to save the old value. - * References that are NULL are filtered during runtime by the barrier - * code to avoid unnecessary queuing. - * - * However in the case of newly allocated objects it might be possible to - * prove that the reference about to be overwritten is NULL during compile - * time and avoid adding the barrier code completely. - * - * The compiler needs to determine that the object in which a field is about - * to be written is newly allocated, and that no prior store to the same field - * has happened since the allocation. - * - * Returns true if the pre-barrier can be removed - */ -bool GraphKit::g1_can_remove_pre_barrier(PhaseTransform* phase, Node* adr, - BasicType bt, uint adr_idx) { - intptr_t offset = 0; - Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); - AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase); - - if (offset == Type::OffsetBot) { - return false; // cannot unalias unless there are precise offsets - } - - if (alloc == NULL) { - return false; // No allocation found - } - - intptr_t size_in_bytes = type2aelembytes(bt); - - Node* mem = memory(adr_idx); // start searching here... - - for (int cnt = 0; cnt < 50; cnt++) { - - if (mem->is_Store()) { - - Node* st_adr = mem->in(MemNode::Address); - intptr_t st_offset = 0; - Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset); - - if (st_base == NULL) { - break; // inscrutable pointer - } - - // Break we have found a store with same base and offset as ours so break - if (st_base == base && st_offset == offset) { - break; - } - - if (st_offset != offset && st_offset != Type::OffsetBot) { - const int MAX_STORE = BytesPerLong; - if (st_offset >= offset + size_in_bytes || - st_offset <= offset - MAX_STORE || - st_offset <= offset - mem->as_Store()->memory_size()) { - // Success: The offsets are provably independent. - // (You may ask, why not just test st_offset != offset and be done? - // The answer is that stores of different sizes can co-exist - // in the same sequence of RawMem effects. We sometimes initialize - // a whole 'tile' of array elements with a single jint or jlong.) - mem = mem->in(MemNode::Memory); - continue; // advance through independent store memory - } - } - - if (st_base != base - && MemNode::detect_ptr_independence(base, alloc, st_base, - AllocateNode::Ideal_allocation(st_base, phase), - phase)) { - // Success: The bases are provably independent. - mem = mem->in(MemNode::Memory); - continue; // advance through independent store memory - } - } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) { - - InitializeNode* st_init = mem->in(0)->as_Initialize(); - AllocateNode* st_alloc = st_init->allocation(); - - // Make sure that we are looking at the same allocation site. - // The alloc variable is guaranteed to not be null here from earlier check. - if (alloc == st_alloc) { - // Check that the initialization is storing NULL so that no previous store - // has been moved up and directly write a reference - Node* captured_store = st_init->find_captured_store(offset, - type2aelembytes(T_OBJECT), - phase); - if (captured_store == NULL || captured_store == st_init->zero_memory()) { - return true; - } - } - } - - // Unless there is an explicit 'continue', we must bail out here, - // because 'mem' is an inscrutable memory state (e.g., a call). - break; - } - - return false; -} - -// G1 pre/post barriers -void GraphKit::g1_write_barrier_pre(bool do_load, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt) { - - // Some sanity checks - // Note: val is unused in this routine. - - if (do_load) { - // We need to generate the load of the previous value - assert(obj != NULL, "must have a base"); - assert(adr != NULL, "where are loading from?"); - assert(pre_val == NULL, "loaded already?"); - assert(val_type != NULL, "need a type"); - - if (use_ReduceInitialCardMarks() - && g1_can_remove_pre_barrier(&_gvn, adr, bt, alias_idx)) { - return; - } - - } else { - // In this case both val_type and alias_idx are unused. - assert(pre_val != NULL, "must be loaded already"); - // Nothing to be done if pre_val is null. - if (pre_val->bottom_type() == TypePtr::NULL_PTR) return; - assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here"); - } - assert(bt == T_OBJECT, "or we shouldn't be here"); - - IdealKit ideal(this, true); - - Node* tls = __ thread(); // ThreadLocalStorage - - Node* no_ctrl = NULL; - Node* no_base = __ top(); - Node* zero = __ ConI(0); - Node* zeroX = __ ConX(0); - - float likely = PROB_LIKELY(0.999); - float unlikely = PROB_UNLIKELY(0.999); - - BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE; - assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width"); - - // Offsets into the thread - const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - const int index_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()); - const int buffer_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()); - - // Now the actual pointers into the thread - Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset)); - Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); - Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); - - // Now some of the values - Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw); - - // if (!marking) - __ if_then(marking, BoolTest::ne, zero, unlikely); { - BasicType index_bt = TypeX_X->basic_type(); - assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size."); - Node* index = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw); - - if (do_load) { - // load original value - // alias_idx correct?? - pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx); - } - - // if (pre_val != NULL) - __ if_then(pre_val, BoolTest::ne, null()); { - Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); - - // is the queue for this thread full? - __ if_then(index, BoolTest::ne, zeroX, likely); { - - // decrement the index - Node* next_index = _gvn.transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); - - // Now get the buffer location we will log the previous value into and store it - Node *log_addr = __ AddP(no_base, buffer, next_index); - __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered); - // update the index - __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered); - - } __ else_(); { - - // logging buffer is full, call the runtime - const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type(); - __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls); - } __ end_if(); // (!index) - } __ end_if(); // (pre_val != NULL) - } __ end_if(); // (!marking) - - // Final sync IdealKit and GraphKit. - final_sync(ideal); -} - -/* - * G1 similar to any GC with a Young Generation requires a way to keep track of - * references from Old Generation to Young Generation to make sure all live - * objects are found. G1 also requires to keep track of object references - * between different regions to enable evacuation of old regions, which is done - * as part of mixed collections. References are tracked in remembered sets and - * is continuously updated as reference are written to with the help of the - * post-barrier. - * - * To reduce the number of updates to the remembered set the post-barrier - * filters updates to fields in objects located in the Young Generation, - * the same region as the reference, when the NULL is being written or - * if the card is already marked as dirty by an earlier write. - * - * Under certain circumstances it is possible to avoid generating the - * post-barrier completely if it is possible during compile time to prove - * the object is newly allocated and that no safepoint exists between the - * allocation and the store. - * - * In the case of slow allocation the allocation code must handle the barrier - * as part of the allocation in the case the allocated object is not located - * in the nursery, this would happen for humongous objects. This is similar to - * how CMS is required to handle this case, see the comments for the method - * CardTableBarrierSet::on_allocation_slowpath_exit and OptoRuntime::new_deferred_store_barrier. - * A deferred card mark is required for these objects and handled in the above - * mentioned methods. - * - * Returns true if the post barrier can be removed - */ -bool GraphKit::g1_can_remove_post_barrier(PhaseTransform* phase, Node* store, - Node* adr) { - intptr_t offset = 0; - Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset); - AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase); - - if (offset == Type::OffsetBot) { - return false; // cannot unalias unless there are precise offsets - } - - if (alloc == NULL) { - return false; // No allocation found - } - - // Start search from Store node - Node* mem = store->in(MemNode::Control); - if (mem->is_Proj() && mem->in(0)->is_Initialize()) { - - InitializeNode* st_init = mem->in(0)->as_Initialize(); - AllocateNode* st_alloc = st_init->allocation(); - - // Make sure we are looking at the same allocation - if (alloc == st_alloc) { - return true; - } - } - - return false; -} - -// -// Update the card table and add card address to the queue -// -void GraphKit::g1_mark_card(IdealKit& ideal, - Node* card_adr, - Node* oop_store, - uint oop_alias_idx, - Node* index, - Node* index_adr, - Node* buffer, - const TypeFunc* tf) { - - Node* zero = __ ConI(0); - Node* zeroX = __ ConX(0); - Node* no_base = __ top(); - BasicType card_bt = T_BYTE; - // Smash zero into card. MUST BE ORDERED WRT TO STORE - __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw); - - // Now do the queue work - __ if_then(index, BoolTest::ne, zeroX); { - - Node* next_index = _gvn.transform(new SubXNode(index, __ ConX(sizeof(intptr_t)))); - Node* log_addr = __ AddP(no_base, buffer, next_index); - - // Order, see storeCM. - __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered); - __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered); - - } __ else_(); { - __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread()); - } __ end_if(); - -} - -void GraphKit::g1_write_barrier_post(Node* oop_store, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - BasicType bt, - bool use_precise) { - // If we are writing a NULL then we need no post barrier - - if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) { - // Must be NULL - const Type* t = val->bottom_type(); - assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL"); - // No post barrier if writing NULLx - return; - } - - if (use_ReduceInitialCardMarks() && obj == just_allocated_object(control())) { - // We can skip marks on a freshly-allocated object in Eden. - // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp. - // That routine informs GC to take appropriate compensating steps, - // upon a slow-path allocation, so as to make this card-mark - // elision safe. - return; - } - - if (use_ReduceInitialCardMarks() - && g1_can_remove_post_barrier(&_gvn, oop_store, adr)) { - return; - } - - if (!use_precise) { - // All card marks for a (non-array) instance are in one place: - adr = obj; - } - // (Else it's an array (or unknown), and we want more precise card marks.) - assert(adr != NULL, ""); - - IdealKit ideal(this, true); - - Node* tls = __ thread(); // ThreadLocalStorage - - Node* no_base = __ top(); - float likely = PROB_LIKELY(0.999); - float unlikely = PROB_UNLIKELY(0.999); - Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val()); - Node* dirty_card = __ ConI((jint)CardTable::dirty_card_val()); - Node* zeroX = __ ConX(0); - - // Get the alias_index for raw card-mark memory - const TypePtr* card_type = TypeRawPtr::BOTTOM; - - const TypeFunc *tf = OptoRuntime::g1_wb_post_Type(); - - // Offsets into the thread - const int index_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()); - const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()); - - // Pointers into the thread - - Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset)); - Node* index_adr = __ AddP(no_base, tls, __ ConX(index_offset)); - - // Now some values - // Use ctrl to avoid hoisting these values past a safepoint, which could - // potentially reset these fields in the JavaThread. - Node* index = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw); - Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw); - - // Convert the store obj pointer to an int prior to doing math on it - // Must use ctrl to prevent "integerized oop" existing across safepoint - Node* cast = __ CastPX(__ ctrl(), adr); - - // Divide pointer by card size - Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) ); - - // Combine card table base and card offset - Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset ); - - // If we know the value being stored does it cross regions? - - if (val != NULL) { - // Does the store cause us to cross regions? - - // Should be able to do an unsigned compare of region_size instead of - // and extra shift. Do we have an unsigned compare?? - // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes); - Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes)); - - // if (xor_res == 0) same region so skip - __ if_then(xor_res, BoolTest::ne, zeroX); { - - // No barrier if we are storing a NULL - __ if_then(val, BoolTest::ne, null(), unlikely); { - - // Ok must mark the card if not already dirty - - // load the original value of the card - Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - - __ if_then(card_val, BoolTest::ne, young_card); { - sync_kit(ideal); - // Use Op_MemBarVolatile to achieve the effect of a StoreLoad barrier. - insert_mem_bar(Op_MemBarVolatile, oop_store); - __ sync_kit(this); - - Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - __ if_then(card_val_reload, BoolTest::ne, dirty_card); { - g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); - } __ end_if(); - } __ end_if(); - } __ end_if(); - } __ end_if(); - } else { - // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks. - // We don't need a barrier here if the destination is a newly allocated object - // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden - // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()). - assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); - Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - __ if_then(card_val, BoolTest::ne, young_card); { - g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); - } __ end_if(); - } - - // Final sync IdealKit and GraphKit. - final_sync(ideal); -} -#undef __ - - Node* GraphKit::load_String_length(Node* ctrl, Node* str) { Node* len = load_array_length(load_String_value(ctrl, str)); Node* coder = load_String_coder(ctrl, str); @@ -4377,9 +3835,9 @@ Node* GraphKit::load_String_value(Node* ctrl, Node* str) { const TypeAryPtr* value_type = TypeAryPtr::make(TypePtr::NotNull, TypeAry::make(TypeInt::BYTE, TypeInt::POS), ciTypeArrayKlass::make(T_BYTE), true, 0); - int value_field_idx = C->get_alias_index(value_field_type); - Node* load = make_load(ctrl, basic_plus_adr(str, str, value_offset), - value_type, T_OBJECT, value_field_idx, MemNode::unordered); + Node* p = basic_plus_adr(str, str, value_offset); + Node* load = access_load_at(str, p, value_field_type, value_type, T_OBJECT, + IN_HEAP | C2_CONTROL_DEPENDENT_LOAD); // String.value field is known to be @Stable. if (UseImplicitStableValues) { load = cast_array_to_stable(load, value_type); @@ -4405,8 +3863,8 @@ void GraphKit::store_String_value(Node* ctrl, Node* str, Node* value) { const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(), false, NULL, 0); const TypePtr* value_field_type = string_type->add_offset(value_offset); - store_oop_to_object(ctrl, str, basic_plus_adr(str, value_offset), value_field_type, - value, TypeAryPtr::BYTES, T_OBJECT, MemNode::unordered); + access_store_at(ctrl, str, basic_plus_adr(str, value_offset), value_field_type, + value, TypeAryPtr::BYTES, T_OBJECT, IN_HEAP); } void GraphKit::store_String_coder(Node* ctrl, Node* str, Node* value) { diff --git a/src/hotspot/share/opto/graphKit.hpp b/src/hotspot/share/opto/graphKit.hpp index bf72444..8517202 100644 --- a/src/hotspot/share/opto/graphKit.hpp +++ b/src/hotspot/share/opto/graphKit.hpp @@ -27,6 +27,7 @@ #include "ci/ciEnv.hpp" #include "ci/ciMethodData.hpp" +#include "gc/shared/c2/barrierSetC2.hpp" #include "opto/addnode.hpp" #include "opto/callnode.hpp" #include "opto/cfgnode.hpp" @@ -38,6 +39,7 @@ #include "opto/type.hpp" #include "runtime/deoptimization.hpp" +class BarrierSetC2; class FastLockNode; class FastUnlockNode; class IdealKit; @@ -63,6 +65,7 @@ class GraphKit : public Phase { SafePointNode* _exceptions;// Parser map(s) for exception state(s) int _bci; // JVM Bytecode Pointer ciMethod* _method; // JVM Current Method + BarrierSetC2* _barrier_set; private: int _sp; // JVM Expression Stack Pointer; don't modify directly! @@ -86,8 +89,9 @@ class GraphKit : public Phase { virtual Parse* is_Parse() const { return NULL; } virtual LibraryCallKit* is_LibraryCallKit() const { return NULL; } - ciEnv* env() const { return _env; } - PhaseGVN& gvn() const { return _gvn; } + ciEnv* env() const { return _env; } + PhaseGVN& gvn() const { return _gvn; } + void* barrier_set_state() const { return C->barrier_set_state(); } void record_for_igvn(Node* n) const { C->record_for_igvn(n); } // delegate to Compile @@ -103,9 +107,6 @@ class GraphKit : public Phase { Node* zerocon(BasicType bt) const { return _gvn.zerocon(bt); } // (See also macro MakeConX in type.hpp, which uses intcon or longcon.) - // Helper for byte_map_base - Node* byte_map_base_node(); - jint find_int_con(Node* n, jint value_if_unknown) { return _gvn.find_int_con(n, value_if_unknown); } @@ -569,70 +570,67 @@ class GraphKit : public Phase { bool unaligned = false, bool mismatched = false); + // Perform decorated accesses - // All in one pre-barrier, store, post_barrier - // Insert a write-barrier'd store. This is to let generational GC - // work; we have to flag all oop-stores before the next GC point. - // - // It comes in 3 flavors of store to an object, array, or unknown. - // We use precise card marks for arrays to avoid scanning the entire - // array. We use imprecise for object. We use precise for unknown - // since we don't know if we have an array or and object or even - // where the object starts. - // - // If val==NULL, it is taken to be a completely unknown value. QQQ - - Node* store_oop(Node* ctl, - Node* obj, // containing obj - Node* adr, // actual adress to store val at - const TypePtr* adr_type, - Node* val, - const TypeOopPtr* val_type, - BasicType bt, - bool use_precise, - MemNode::MemOrd mo, - bool mismatched = false); - - Node* store_oop_to_object(Node* ctl, - Node* obj, // containing obj - Node* adr, // actual adress to store val at - const TypePtr* adr_type, - Node* val, - const TypeOopPtr* val_type, - BasicType bt, - MemNode::MemOrd mo) { - return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false, mo); - } - - Node* store_oop_to_array(Node* ctl, - Node* obj, // containing obj - Node* adr, // actual adress to store val at - const TypePtr* adr_type, - Node* val, - const TypeOopPtr* val_type, - BasicType bt, - MemNode::MemOrd mo) { - return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo); - } - - // Could be an array or object we don't know at compile time (unsafe ref.) - Node* store_oop_to_unknown(Node* ctl, - Node* obj, // containing obj - Node* adr, // actual adress to store val at + Node* access_store_at(Node* ctl, + Node* obj, // containing obj + Node* adr, // actual adress to store val at + const TypePtr* adr_type, + Node* val, + const Type* val_type, + BasicType bt, + DecoratorSet decorators); + + Node* access_load_at(Node* obj, // containing obj + Node* adr, // actual adress to store val at + const TypePtr* adr_type, + const Type* val_type, + BasicType bt, + DecoratorSet decorators); + + Node* access_atomic_cmpxchg_val_at(Node* ctl, + Node* obj, + Node* adr, + const TypePtr* adr_type, + int alias_idx, + Node* expected_val, + Node* new_val, + const Type* value_type, + BasicType bt, + DecoratorSet decorators); + + Node* access_atomic_cmpxchg_bool_at(Node* ctl, + Node* obj, + Node* adr, + const TypePtr* adr_type, + int alias_idx, + Node* expected_val, + Node* new_val, + const Type* value_type, + BasicType bt, + DecoratorSet decorators); + + Node* access_atomic_xchg_at(Node* ctl, + Node* obj, + Node* adr, + const TypePtr* adr_type, + int alias_idx, + Node* new_val, + const Type* value_type, + BasicType bt, + DecoratorSet decorators); + + Node* access_atomic_add_at(Node* ctl, + Node* obj, + Node* adr, const TypePtr* adr_type, - Node* val, + int alias_idx, + Node* new_val, + const Type* value_type, BasicType bt, - MemNode::MemOrd mo, - bool mismatched = false); + DecoratorSet decorators); - // For the few case where the barriers need special help - void pre_barrier(bool do_load, Node* ctl, - Node* obj, Node* adr, uint adr_idx, Node* val, const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt); - - void post_barrier(Node* ctl, Node* store, Node* obj, Node* adr, uint adr_idx, - Node* val, BasicType bt, bool use_precise); + void access_clone(Node* ctl, Node* src, Node* dst, Node* size, bool is_array); // Return addressing for an array element. Node* array_element_address(Node* ary, Node* idx, BasicType elembt, @@ -754,47 +752,10 @@ class GraphKit : public Phase { // Returns the object (if any) which was created the moment before. Node* just_allocated_object(Node* current_control); - static bool use_ReduceInitialCardMarks(); - // Sync Ideal and Graph kits. void sync_kit(IdealKit& ideal); void final_sync(IdealKit& ideal); - // vanilla/CMS post barrier - void write_barrier_post(Node *store, Node* obj, - Node* adr, uint adr_idx, Node* val, bool use_precise); - - // Allow reordering of pre-barrier with oop store and/or post-barrier. - // Used for load_store operations which loads old value. - bool can_move_pre_barrier() const; - - // G1 pre/post barriers - void g1_write_barrier_pre(bool do_load, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - const TypeOopPtr* val_type, - Node* pre_val, - BasicType bt); - - void g1_write_barrier_post(Node* store, - Node* obj, - Node* adr, - uint alias_idx, - Node* val, - BasicType bt, - bool use_precise); - // Helper function for g1 - private: - void g1_mark_card(IdealKit& ideal, Node* card_adr, Node* store, uint oop_alias_idx, - Node* index, Node* index_adr, - Node* buffer, const TypeFunc* tf); - - bool g1_can_remove_pre_barrier(PhaseTransform* phase, Node* adr, BasicType bt, uint adr_idx); - - bool g1_can_remove_post_barrier(PhaseTransform* phase, Node* store, Node* adr); - public: // Helper function to round double arguments before a call void round_double_arguments(ciMethod* dest_method); diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index 8d77ce6..520304d 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -243,12 +243,9 @@ class LibraryCallKit : public GraphKit { // This returns Type::AnyPtr, RawPtr, or OopPtr. int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type); Node* make_unsafe_address(Node*& base, Node* offset, BasicType type = T_ILLEGAL, bool can_cast = false); - // Helper for inline_unsafe_access. - // Generates the guards that check whether the result of - // Unsafe.getObject should be recorded in an SATB log buffer. - void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar); typedef enum { Relaxed, Opaque, Volatile, Acquire, Release } AccessKind; + DecoratorSet mo_decorator_for_access_kind(AccessKind kind); bool inline_unsafe_access(bool is_store, BasicType type, AccessKind kind, bool is_unaligned); static bool klass_needs_init_guard(Node* kls); bool inline_unsafe_allocate(); @@ -268,7 +265,7 @@ class LibraryCallKit : public GraphKit { bool inline_array_copyOf(bool is_copyOfRange); bool inline_array_equals(StrIntrinsicNode::ArgEnc ae); bool inline_preconditions_checkIndex(); - void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark); + void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array); bool inline_native_clone(bool is_virtual); bool inline_native_Reflection_getCallerClass(); // Helper function for inlining native object hash method @@ -284,8 +281,6 @@ class LibraryCallKit : public GraphKit { uint new_idx); typedef enum { LS_get_add, LS_get_set, LS_cmp_swap, LS_cmp_swap_weak, LS_cmp_exchange } LoadStoreKind; - MemNode::MemOrd access_kind_to_memord_LS(AccessKind access_kind, bool is_store); - MemNode::MemOrd access_kind_to_memord(AccessKind access_kind); bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind, AccessKind access_kind); bool inline_unsafe_fence(vmIntrinsics::ID id); bool inline_onspinwait(); @@ -2223,106 +2218,6 @@ bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) { //----------------------------inline_unsafe_access---------------------------- -// Helper that guards and inserts a pre-barrier. -void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset, - Node* pre_val, bool need_mem_bar) { - // We could be accessing the referent field of a reference object. If so, when G1 - // is enabled, we need to log the value in the referent field in an SATB buffer. - // This routine performs some compile time filters and generates suitable - // runtime filters that guard the pre-barrier code. - // Also add memory barrier for non volatile load from the referent field - // to prevent commoning of loads across safepoint. - if (!UseG1GC && !need_mem_bar) - return; - - // Some compile time checks. - - // If offset is a constant, is it java_lang_ref_Reference::_reference_offset? - const TypeX* otype = offset->find_intptr_t_type(); - if (otype != NULL && otype->is_con() && - otype->get_con() != java_lang_ref_Reference::referent_offset) { - // Constant offset but not the reference_offset so just return - return; - } - - // We only need to generate the runtime guards for instances. - const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr(); - if (btype != NULL) { - if (btype->isa_aryptr()) { - // Array type so nothing to do - return; - } - - const TypeInstPtr* itype = btype->isa_instptr(); - if (itype != NULL) { - // Can the klass of base_oop be statically determined to be - // _not_ a sub-class of Reference and _not_ Object? - ciKlass* klass = itype->klass(); - if ( klass->is_loaded() && - !klass->is_subtype_of(env()->Reference_klass()) && - !env()->Object_klass()->is_subtype_of(klass)) { - return; - } - } - } - - // The compile time filters did not reject base_oop/offset so - // we need to generate the following runtime filters - // - // if (offset == java_lang_ref_Reference::_reference_offset) { - // if (instance_of(base, java.lang.ref.Reference)) { - // pre_barrier(_, pre_val, ...); - // } - // } - - float likely = PROB_LIKELY( 0.999); - float unlikely = PROB_UNLIKELY(0.999); - - IdealKit ideal(this); -#define __ ideal. - - Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset); - - __ if_then(offset, BoolTest::eq, referent_off, unlikely); { - // Update graphKit memory and control from IdealKit. - sync_kit(ideal); - - Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass())); - Node* is_instof = gen_instanceof(base_oop, ref_klass_con); - - // Update IdealKit memory and control from graphKit. - __ sync_kit(this); - - Node* one = __ ConI(1); - // is_instof == 0 if base_oop == NULL - __ if_then(is_instof, BoolTest::eq, one, unlikely); { - - // Update graphKit from IdeakKit. - sync_kit(ideal); - - // Use the pre-barrier to record the value in the referent field - pre_barrier(false /* do_load */, - __ ctrl(), - NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */, - pre_val /* pre_val */, - T_OBJECT); - if (need_mem_bar) { - // Add memory barrier to prevent commoning reads from this field - // across safepoint since GC can change its value. - insert_mem_bar(Op_MemBarCPUOrder); - } - // Update IdealKit from graphKit. - __ sync_kit(this); - - } __ end_if(); // _ref_type != ref_none - } __ end_if(); // offset == referent_offset - - // Final sync IdealKit and GraphKit. - final_sync(ideal); -#undef __ -} - - const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type) { // Attempt to infer a sharper value type from the offset and base type. ciKlass* sharpened_klass = NULL; @@ -2361,12 +2256,39 @@ const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_ return NULL; } +DecoratorSet LibraryCallKit::mo_decorator_for_access_kind(AccessKind kind) { + switch (kind) { + case Relaxed: + return MO_UNORDERED; + case Opaque: + return MO_RELAXED; + case Acquire: + return MO_ACQUIRE; + case Release: + return MO_RELEASE; + case Volatile: + return MO_SEQ_CST; + default: + ShouldNotReachHere(); + return 0; + } +} + bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, const AccessKind kind, const bool unaligned) { if (callee()->is_static()) return false; // caller must have the capability! + DecoratorSet decorators = C2_UNSAFE_ACCESS; guarantee(!is_store || kind != Acquire, "Acquire accesses can be produced only for loads"); guarantee( is_store || kind != Release, "Release accesses can be produced only for stores"); assert(type != T_OBJECT || !unaligned, "unaligned access not supported with object type"); + if (type == T_OBJECT || type == T_ARRAY) { + decorators |= ON_UNKNOWN_OOP_REF; + } + + if (unaligned) { + decorators |= C2_UNALIGNED; + } + #ifndef PRODUCT { ResourceMark rm; @@ -2425,6 +2347,10 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c // Can base be NULL? Otherwise, always on-heap access. bool can_access_non_heap = TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop)); + if (!can_access_non_heap) { + decorators |= IN_HEAP; + } + val = is_store ? argument(4) : NULL; const TypePtr *adr_type = _gvn.type(adr)->isa_ptr(); @@ -2462,60 +2388,15 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c assert(!mismatched || alias_type->adr_type()->is_oopptr(), "off-heap access can't be mismatched"); - // First guess at the value type. - const Type *value_type = Type::get_const_basic_type(type); - - // We will need memory barriers unless we can determine a unique - // alias category for this reference. (Note: If for some reason - // the barriers get omitted and the unsafe reference begins to "pollute" - // the alias analysis of the rest of the graph, either Compile::can_alias - // or Compile::must_alias will throw a diagnostic assert.) - bool need_mem_bar = false; - switch (kind) { - case Relaxed: - need_mem_bar = (mismatched && !adr_type->isa_aryptr()) || can_access_non_heap; - break; - case Opaque: - // Opaque uses CPUOrder membars for protection against code movement. - case Acquire: - case Release: - case Volatile: - need_mem_bar = true; - break; - default: - ShouldNotReachHere(); + if (mismatched) { + decorators |= C2_MISMATCHED; } - // Some accesses require access atomicity for all types, notably longs and doubles. - // When AlwaysAtomicAccesses is enabled, all accesses are atomic. - bool requires_atomic_access = false; - switch (kind) { - case Relaxed: - requires_atomic_access = AlwaysAtomicAccesses; - break; - case Opaque: - // Opaque accesses are atomic. - case Acquire: - case Release: - case Volatile: - requires_atomic_access = true; - break; - default: - ShouldNotReachHere(); - } + // First guess at the value type. + const Type *value_type = Type::get_const_basic_type(type); // Figure out the memory ordering. - // Acquire/Release/Volatile accesses require marking the loads/stores with MemOrd - MemNode::MemOrd mo = access_kind_to_memord_LS(kind, is_store); - - // If we are reading the value of the referent field of a Reference - // object (either by using Unsafe directly or through reflection) - // then, if G1 is enabled, we need to record the referent in an - // SATB log buffer using the pre-barrier mechanism. - // Also we need to add memory barrier to prevent commoning reads - // from this field across safepoint since GC can change its value. - bool need_read_barrier = !is_store && - offset != top() && heap_base_oop != top(); + decorators |= mo_decorator_for_access_kind(kind); if (!is_store && type == T_OBJECT) { const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type); @@ -2533,39 +2414,6 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c // and it is not possible to fully distinguish unintended nulls // from intended ones in this API. - // We need to emit leading and trailing CPU membars (see below) in - // addition to memory membars for special access modes. This is a little - // too strong, but avoids the need to insert per-alias-type - // volatile membars (for stores; compare Parse::do_put_xxx), which - // we cannot do effectively here because we probably only have a - // rough approximation of type. - - switch(kind) { - case Relaxed: - case Opaque: - case Acquire: - break; - case Release: - case Volatile: - if (is_store) { - insert_mem_bar(Op_MemBarRelease); - } else { - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - insert_mem_bar(Op_MemBarVolatile); - } - } - break; - default: - ShouldNotReachHere(); - } - - // Memory barrier to prevent normal and 'unsafe' accesses from - // bypassing each other. Happens after null checks, so the - // exception paths do not take memory state from the memory barrier, - // so there's no problems making a strong assert about mixing users - // of safe & unsafe memory. - if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder); - if (!is_store) { Node* p = NULL; // Try to constant fold a load from a constant field @@ -2574,37 +2422,17 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c // final or stable field p = make_constant_from_field(field, heap_base_oop); } - if (p == NULL) { - // To be valid, unsafe loads may depend on other conditions than - // the one that guards them: pin the Load node - LoadNode::ControlDependency dep = LoadNode::Pinned; - Node* ctrl = control(); - // non volatile loads may be able to float - if (!need_mem_bar && adr_type->isa_instptr()) { - assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null"); - intptr_t offset = Type::OffsetBot; - AddPNode::Ideal_base_and_offset(adr, &_gvn, offset); - if (offset >= 0) { - int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->klass()->layout_helper()); - if (offset < s) { - // Guaranteed to be a valid access, no need to pin it - dep = LoadNode::DependsOnlyOnTest; - ctrl = NULL; - } - } - } - p = make_load(ctrl, adr, value_type, type, adr_type, mo, dep, requires_atomic_access, unaligned, mismatched); - // load value - switch (type) { - case T_BOOLEAN: - { - // Normalize the value returned by getBoolean in the following cases - if (mismatched || - heap_base_oop == top() || // - heap_base_oop is NULL or - (can_access_non_heap && alias_type->field() == NULL) // - heap_base_oop is potentially NULL - // and the unsafe access is made to large offset - // (i.e., larger than the maximum offset necessary for any - // field access) + + if (p == NULL) { // Could not constant fold the load + p = access_load_at(heap_base_oop, adr, adr_type, value_type, type, decorators); + // Normalize the value returned by getBoolean in the following cases + if (type == T_BOOLEAN && + (mismatched || + heap_base_oop == top() || // - heap_base_oop is NULL or + (can_access_non_heap && field == NULL)) // - heap_base_oop is potentially NULL + // and the unsafe access is made to large offset + // (i.e., larger than the maximum offset necessary for any + // field access) ) { IdealKit ideal = IdealKit(this); #define __ ideal. @@ -2617,81 +2445,26 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c final_sync(ideal); p = __ value(normalized_result); #undef __ - } - } - case T_CHAR: - case T_BYTE: - case T_SHORT: - case T_INT: - case T_LONG: - case T_FLOAT: - case T_DOUBLE: - break; - case T_OBJECT: - if (need_read_barrier) { - // We do not require a mem bar inside pre_barrier if need_mem_bar - // is set: the barriers would be emitted by us. - insert_pre_barrier(heap_base_oop, offset, p, !need_mem_bar); - } - break; - case T_ADDRESS: - // Cast to an int type. - p = _gvn.transform(new CastP2XNode(NULL, p)); - p = ConvX2UL(p); - break; - default: - fatal("unexpected type %d: %s", type, type2name(type)); - break; } } + if (type == T_ADDRESS) { + p = gvn().transform(new CastP2XNode(NULL, p)); + p = ConvX2UL(p); + } // The load node has the control of the preceding MemBarCPUOrder. All // following nodes will have the control of the MemBarCPUOrder inserted at // the end of this method. So, pushing the load onto the stack at a later // point is fine. set_result(p); } else { - // place effect of store into memory - switch (type) { - case T_DOUBLE: - val = dstore_rounding(val); - break; - case T_ADDRESS: + if (bt == T_ADDRESS) { // Repackage the long as a pointer. val = ConvL2X(val); - val = _gvn.transform(new CastX2PNode(val)); - break; - default: - break; - } - - if (type == T_OBJECT) { - store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched); - } else { - store_to_memory(control(), adr, val, type, adr_type, mo, requires_atomic_access, unaligned, mismatched); + val = gvn().transform(new CastX2PNode(val)); } + access_store_at(control(), heap_base_oop, adr, adr_type, val, value_type, type, decorators); } - switch(kind) { - case Relaxed: - case Opaque: - case Release: - break; - case Acquire: - case Volatile: - if (!is_store) { - insert_mem_bar(Op_MemBarAcquire); - } else { - if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { - insert_mem_bar(Op_MemBarVolatile); - } - } - break; - default: - ShouldNotReachHere(); - } - - if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder); - return true; } @@ -2756,6 +2529,9 @@ bool LibraryCallKit::inline_unsafe_load_store(const BasicType type, const LoadSt if (callee()->is_static()) return false; // caller must have the capability! + DecoratorSet decorators = C2_UNSAFE_ACCESS; + decorators |= mo_decorator_for_access_kind(access_kind); + #ifndef PRODUCT BasicType rtype; { @@ -2887,318 +2663,54 @@ bool LibraryCallKit::inline_unsafe_load_store(const BasicType type, const LoadSt int alias_idx = C->get_alias_index(adr_type); - // Memory-model-wise, a LoadStore acts like a little synchronized - // block, so needs barriers on each side. These don't translate - // into actual barriers on most machines, but we still need rest of - // compiler to respect ordering. - - switch (access_kind) { - case Relaxed: - case Acquire: - break; - case Release: - insert_mem_bar(Op_MemBarRelease); - break; - case Volatile: - if (support_IRIW_for_not_multiple_copy_atomic_cpu) { - insert_mem_bar(Op_MemBarVolatile); - } else { - insert_mem_bar(Op_MemBarRelease); - } - break; - default: - ShouldNotReachHere(); - } - insert_mem_bar(Op_MemBarCPUOrder); + if (type == T_OBJECT || type == T_ARRAY) { + decorators |= IN_HEAP | ON_UNKNOWN_OOP_REF; - // Figure out the memory ordering. - MemNode::MemOrd mo = access_kind_to_memord(access_kind); - - // 4984716: MemBars must be inserted before this - // memory node in order to avoid a false - // dependency which will confuse the scheduler. - Node *mem = memory(alias_idx); - - // For now, we handle only those cases that actually exist: ints, - // longs, and Object. Adding others should be straightforward. - Node* load_store = NULL; - switch(type) { - case T_BYTE: - switch(kind) { - case LS_get_add: - load_store = _gvn.transform(new GetAndAddBNode(control(), mem, adr, newval, adr_type)); - break; - case LS_get_set: - load_store = _gvn.transform(new GetAndSetBNode(control(), mem, adr, newval, adr_type)); - break; - case LS_cmp_swap_weak: - load_store = _gvn.transform(new WeakCompareAndSwapBNode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_swap: - load_store = _gvn.transform(new CompareAndSwapBNode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_exchange: - load_store = _gvn.transform(new CompareAndExchangeBNode(control(), mem, adr, newval, oldval, adr_type, mo)); - break; - default: - ShouldNotReachHere(); - } - break; - case T_SHORT: - switch(kind) { - case LS_get_add: - load_store = _gvn.transform(new GetAndAddSNode(control(), mem, adr, newval, adr_type)); - break; - case LS_get_set: - load_store = _gvn.transform(new GetAndSetSNode(control(), mem, adr, newval, adr_type)); - break; - case LS_cmp_swap_weak: - load_store = _gvn.transform(new WeakCompareAndSwapSNode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_swap: - load_store = _gvn.transform(new CompareAndSwapSNode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_exchange: - load_store = _gvn.transform(new CompareAndExchangeSNode(control(), mem, adr, newval, oldval, adr_type, mo)); - break; - default: - ShouldNotReachHere(); - } - break; - case T_INT: - switch(kind) { - case LS_get_add: - load_store = _gvn.transform(new GetAndAddINode(control(), mem, adr, newval, adr_type)); - break; - case LS_get_set: - load_store = _gvn.transform(new GetAndSetINode(control(), mem, adr, newval, adr_type)); - break; - case LS_cmp_swap_weak: - load_store = _gvn.transform(new WeakCompareAndSwapINode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_swap: - load_store = _gvn.transform(new CompareAndSwapINode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_exchange: - load_store = _gvn.transform(new CompareAndExchangeINode(control(), mem, adr, newval, oldval, adr_type, mo)); - break; - default: - ShouldNotReachHere(); - } - break; - case T_LONG: - switch(kind) { - case LS_get_add: - load_store = _gvn.transform(new GetAndAddLNode(control(), mem, adr, newval, adr_type)); - break; - case LS_get_set: - load_store = _gvn.transform(new GetAndSetLNode(control(), mem, adr, newval, adr_type)); - break; - case LS_cmp_swap_weak: - load_store = _gvn.transform(new WeakCompareAndSwapLNode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_swap: - load_store = _gvn.transform(new CompareAndSwapLNode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_exchange: - load_store = _gvn.transform(new CompareAndExchangeLNode(control(), mem, adr, newval, oldval, adr_type, mo)); - break; - default: - ShouldNotReachHere(); - } - break; - case T_OBJECT: // Transformation of a value which could be NULL pointer (CastPP #NULL) // could be delayed during Parse (for example, in adjust_map_after_if()). // Execute transformation here to avoid barrier generation in such case. if (_gvn.type(newval) == TypePtr::NULL_PTR) newval = _gvn.makecon(TypePtr::NULL_PTR); - // Reference stores need a store barrier. - switch(kind) { - case LS_get_set: { - // If pre-barrier must execute before the oop store, old value will require do_load here. - if (!can_move_pre_barrier()) { - pre_barrier(true /* do_load*/, - control(), base, adr, alias_idx, newval, value_type->make_oopptr(), - NULL /* pre_val*/, - T_OBJECT); - } // Else move pre_barrier to use load_store value, see below. - break; - } - case LS_cmp_swap_weak: - case LS_cmp_swap: - case LS_cmp_exchange: { - // Same as for newval above: - if (_gvn.type(oldval) == TypePtr::NULL_PTR) { - oldval = _gvn.makecon(TypePtr::NULL_PTR); - } - // The only known value which might get overwritten is oldval. - pre_barrier(false /* do_load */, - control(), NULL, NULL, max_juint, NULL, NULL, - oldval /* pre_val */, - T_OBJECT); - break; - } - default: - ShouldNotReachHere(); - } - -#ifdef _LP64 - if (adr->bottom_type()->is_ptr_to_narrowoop()) { - Node *newval_enc = _gvn.transform(new EncodePNode(newval, newval->bottom_type()->make_narrowoop())); - - switch(kind) { - case LS_get_set: - load_store = _gvn.transform(new GetAndSetNNode(control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop())); - break; - case LS_cmp_swap_weak: { - Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop())); - load_store = _gvn.transform(new WeakCompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo)); - break; - } - case LS_cmp_swap: { - Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop())); - load_store = _gvn.transform(new CompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo)); - break; - } - case LS_cmp_exchange: { - Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop())); - load_store = _gvn.transform(new CompareAndExchangeNNode(control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo)); - break; - } - default: - ShouldNotReachHere(); - } - } else -#endif - switch (kind) { - case LS_get_set: - load_store = _gvn.transform(new GetAndSetPNode(control(), mem, adr, newval, adr_type, value_type->is_oopptr())); - break; - case LS_cmp_swap_weak: - load_store = _gvn.transform(new WeakCompareAndSwapPNode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_swap: - load_store = _gvn.transform(new CompareAndSwapPNode(control(), mem, adr, newval, oldval, mo)); - break; - case LS_cmp_exchange: - load_store = _gvn.transform(new CompareAndExchangePNode(control(), mem, adr, newval, oldval, adr_type, value_type->is_oopptr(), mo)); - break; - default: - ShouldNotReachHere(); + if (oldval != NULL && _gvn.type(oldval) == TypePtr::NULL_PTR) { + // Refine the value to a null constant, when it is known to be null + oldval = _gvn.makecon(TypePtr::NULL_PTR); } - - // Emit the post barrier only when the actual store happened. This makes sense - // to check only for LS_cmp_* that can fail to set the value. - // LS_cmp_exchange does not produce any branches by default, so there is no - // boolean result to piggyback on. TODO: When we merge CompareAndSwap with - // CompareAndExchange and move branches here, it would make sense to conditionalize - // post_barriers for LS_cmp_exchange as well. - // - // CAS success path is marked more likely since we anticipate this is a performance - // critical path, while CAS failure path can use the penalty for going through unlikely - // path as backoff. Which is still better than doing a store barrier there. - switch (kind) { - case LS_get_set: - case LS_cmp_exchange: { - post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true); - break; - } - case LS_cmp_swap_weak: - case LS_cmp_swap: { - IdealKit ideal(this); - ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); { - sync_kit(ideal); - post_barrier(ideal.ctrl(), load_store, base, adr, alias_idx, newval, T_OBJECT, true); - ideal.sync_kit(this); - } ideal.end_if(); - final_sync(ideal); - break; - } - default: - ShouldNotReachHere(); - } - break; - default: - fatal("unexpected type %d: %s", type, type2name(type)); - break; } - // SCMemProjNodes represent the memory state of a LoadStore. Their - // main role is to prevent LoadStore nodes from being optimized away - // when their results aren't used. - Node* proj = _gvn.transform(new SCMemProjNode(load_store)); - set_memory(proj, alias_idx); - - if (type == T_OBJECT && (kind == LS_get_set || kind == LS_cmp_exchange)) { -#ifdef _LP64 - if (adr->bottom_type()->is_ptr_to_narrowoop()) { - load_store = _gvn.transform(new DecodeNNode(load_store, load_store->get_ptr_type())); + Node* result = NULL; + switch (kind) { + case LS_cmp_exchange: { + result = access_atomic_cmpxchg_val_at(control(), base, adr, adr_type, alias_idx, + oldval, newval, value_type, type, decorators); + break; } -#endif - if (can_move_pre_barrier() && kind == LS_get_set) { - // Don't need to load pre_val. The old value is returned by load_store. - // The pre_barrier can execute after the xchg as long as no safepoint - // gets inserted between them. - pre_barrier(false /* do_load */, - control(), NULL, NULL, max_juint, NULL, NULL, - load_store /* pre_val */, - T_OBJECT); + case LS_cmp_swap_weak: + decorators |= C2_WEAK_CMPXCHG; + case LS_cmp_swap: { + result = access_atomic_cmpxchg_bool_at(control(), base, adr, adr_type, alias_idx, + oldval, newval, value_type, type, decorators); + break; } - } - - // Add the trailing membar surrounding the access - insert_mem_bar(Op_MemBarCPUOrder); - - switch (access_kind) { - case Relaxed: - case Release: - break; // do nothing - case Acquire: - case Volatile: - insert_mem_bar(Op_MemBarAcquire); - // !support_IRIW_for_not_multiple_copy_atomic_cpu handled in platform code + case LS_get_set: { + result = access_atomic_xchg_at(control(), base, adr, adr_type, alias_idx, + newval, value_type, type, decorators); break; + } + case LS_get_add: { + result = access_atomic_add_at(control(), base, adr, adr_type, alias_idx, + newval, value_type, type, decorators); + break; + } default: ShouldNotReachHere(); } - assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match"); - set_result(load_store); + assert(type2size[result->bottom_type()->basic_type()] == type2size[rtype], "result type should match"); + set_result(result); return true; } -MemNode::MemOrd LibraryCallKit::access_kind_to_memord_LS(AccessKind kind, bool is_store) { - MemNode::MemOrd mo = MemNode::unset; - switch(kind) { - case Opaque: - case Relaxed: mo = MemNode::unordered; break; - case Acquire: mo = MemNode::acquire; break; - case Release: mo = MemNode::release; break; - case Volatile: mo = is_store ? MemNode::release : MemNode::acquire; break; - default: - ShouldNotReachHere(); - } - guarantee(mo != MemNode::unset, "Should select memory ordering"); - return mo; -} - -MemNode::MemOrd LibraryCallKit::access_kind_to_memord(AccessKind kind) { - MemNode::MemOrd mo = MemNode::unset; - switch(kind) { - case Opaque: - case Relaxed: mo = MemNode::unordered; break; - case Acquire: mo = MemNode::acquire; break; - case Release: mo = MemNode::release; break; - case Volatile: mo = MemNode::seqcst; break; - default: - ShouldNotReachHere(); - } - guarantee(mo != MemNode::unset, "Should select memory ordering"); - return mo; -} - bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) { // Regardless of form, don't allow previous ld/st to move down, // then issue acquire, release, or volatile mem_bar. @@ -4635,7 +4147,7 @@ bool LibraryCallKit::inline_unsafe_copyMemory() { //------------------------clone_coping----------------------------------- // Helper function for inline_native_clone. -void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark) { +void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array) { assert(obj_size != NULL, ""); Node* raw_obj = alloc_obj->in(1); assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), ""); @@ -4655,66 +4167,9 @@ void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, b // Copy the fastest available way. // TODO: generate fields copies for small objects instead. - Node* src = obj; - Node* dest = alloc_obj; Node* size = _gvn.transform(obj_size); - // Exclude the header but include array length to copy by 8 bytes words. - // Can't use base_offset_in_bytes(bt) since basic type is unknown. - int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() : - instanceOopDesc::base_offset_in_bytes(); - // base_off: - // 8 - 32-bit VM - // 12 - 64-bit VM, compressed klass - // 16 - 64-bit VM, normal klass - if (base_off % BytesPerLong != 0) { - assert(UseCompressedClassPointers, ""); - if (is_array) { - // Exclude length to copy by 8 bytes words. - base_off += sizeof(int); - } else { - // Include klass to copy by 8 bytes words. - base_off = instanceOopDesc::klass_offset_in_bytes(); - } - assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment"); - } - src = basic_plus_adr(src, base_off); - dest = basic_plus_adr(dest, base_off); - - // Compute the length also, if needed: - Node* countx = size; - countx = _gvn.transform(new SubXNode(countx, MakeConX(base_off))); - countx = _gvn.transform(new URShiftXNode(countx, intcon(LogBytesPerLong) )); - - const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; - - ArrayCopyNode* ac = ArrayCopyNode::make(this, false, src, NULL, dest, NULL, countx, false, false); - ac->set_clonebasic(); - Node* n = _gvn.transform(ac); - if (n == ac) { - set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type); - } else { - set_all_memory(n); - } - - // If necessary, emit some card marks afterwards. (Non-arrays only.) - if (card_mark) { - assert(!is_array, ""); - // Put in store barrier for any and all oops we are sticking - // into this object. (We could avoid this if we could prove - // that the object type contains no oop fields at all.) - Node* no_particular_value = NULL; - Node* no_particular_field = NULL; - int raw_adr_idx = Compile::AliasIdxRaw; - post_barrier(control(), - memory(raw_adr_type), - alloc_obj, - no_particular_field, - raw_adr_idx, - no_particular_value, - T_OBJECT, - false); - } + access_clone(control(), obj, alloc_obj, size, is_array); // Do not let reads from the cloned object float above the arraycopy. if (alloc != NULL) { @@ -4804,9 +4259,6 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) { PhiNode* result_mem = new PhiNode(result_reg, Type::MEMORY, TypePtr::BOTTOM); record_for_igvn(result_reg); - const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM; - int raw_adr_idx = Compile::AliasIdxRaw; - Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL); if (array_ctl != NULL) { // It's an array. @@ -4816,9 +4268,10 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) { Node* obj_size = NULL; Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size); // no arguments to push - if (!use_ReduceInitialCardMarks()) { + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + if (bs->array_copy_requires_gc_barriers(T_OBJECT)) { // If it is an oop array, it requires very special treatment, - // because card marking is required on each card of the array. + // because gc barriers are required when accessing the array. Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL); if (is_obja != NULL) { PreserveJVMState pjvms2(this); @@ -4837,7 +4290,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) { result_mem ->set_req(_objArray_path, reset_memory()); } } - // Otherwise, there are no card marks to worry about. + // Otherwise, there are no barriers to worry about. // (We can dispense with card marks if we know the allocation // comes out of eden (TLAB)... In fact, ReduceInitialCardMarks // causes the non-eden paths to take compensating steps to @@ -4846,7 +4299,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) { // the object.) if (!stopped()) { - copy_to_clone(obj, alloc_obj, obj_size, true, false); + copy_to_clone(obj, alloc_obj, obj_size, true); // Present the results of the copy. result_reg->init_req(_array_path, control()); @@ -4892,7 +4345,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) { // exception state between multiple Object.clone versions (reexecute=true vs reexecute=false). Node* alloc_obj = new_instance(obj_klass, NULL, &obj_size, /*deoptimize_on_exception=*/true); - copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks()); + copy_to_clone(obj, alloc_obj, obj_size, false); // Present the results of the slow call. result_reg->init_req(_instance_path, control()); @@ -6099,21 +5552,23 @@ bool LibraryCallKit::inline_reference_get() { Node* reference_obj = null_check_receiver(); if (stopped()) return true; + const TypeInstPtr* tinst = _gvn.type(reference_obj)->isa_instptr(); + assert(tinst != NULL, "obj is null"); + assert(tinst->klass()->is_loaded(), "obj is not loaded"); + ciInstanceKlass* referenceKlass = tinst->klass()->as_instance_klass(); + ciField* field = referenceKlass->get_field_by_name(ciSymbol::make("referent"), + ciSymbol::make("Ljava/lang/Object;"), + false); + assert (field != NULL, "undefined field"); + Node* adr = basic_plus_adr(reference_obj, reference_obj, referent_offset); + const TypePtr* adr_type = C->alias_type(field)->adr_type(); ciInstanceKlass* klass = env()->Object_klass(); const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass); - Node* no_ctrl = NULL; - Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT, MemNode::unordered); - - // Use the pre-barrier to record the value in the referent field - pre_barrier(false /* do_load */, - control(), - NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */, - result /* pre_val */, - T_OBJECT); - + DecoratorSet decorators = IN_HEAP | ON_WEAK_OOP_REF; + Node* result = access_load_at(reference_obj, adr, adr_type, object_type, T_OBJECT, decorators); // Add memory barrier to prevent commoning reads from this field // across safepoint since GC can change its value. insert_mem_bar(Op_MemBarCPUOrder); @@ -6166,20 +5621,13 @@ Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * field type = Type::get_const_basic_type(bt); } - if (support_IRIW_for_not_multiple_copy_atomic_cpu && is_vol) { - insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier - } - // Build the load. - MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered; - Node* loadedField = make_load(NULL, adr, type, bt, adr_type, mo, LoadNode::DependsOnlyOnTest, is_vol); - // If reference is volatile, prevent following memory ops from - // floating up past the volatile read. Also prevents commoning - // another volatile read. + DecoratorSet decorators = IN_HEAP; + if (is_vol) { - // Memory barrier includes bogus read of value to force load BEFORE membar - insert_mem_bar(Op_MemBarAcquire, loadedField); + decorators |= MO_SEQ_CST; } - return loadedField; + + return access_load_at(fromObj, adr, adr_type, type, bt, decorators); } Node * LibraryCallKit::field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, diff --git a/src/hotspot/share/opto/loopopts.cpp b/src/hotspot/share/opto/loopopts.cpp index 3e5a649..f21a5e5 100644 --- a/src/hotspot/share/opto/loopopts.cpp +++ b/src/hotspot/share/opto/loopopts.cpp @@ -23,6 +23,8 @@ */ #include "precompiled.hpp" +#include "gc/shared/barrierSet.hpp" +#include "gc/shared/c2/barrierSetC2.hpp" #include "memory/allocation.inline.hpp" #include "memory/resourceArea.hpp" #include "opto/addnode.hpp" @@ -1375,6 +1377,9 @@ void PhaseIdealLoop::split_if_with_blocks_post(Node *n) { get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) { _igvn.replace_node( n, n->in(1) ); } + + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + bs->loop_optimize_gc_barrier(n); } //------------------------------split_if_with_blocks--------------------------- diff --git a/src/hotspot/share/opto/macro.cpp b/src/hotspot/share/opto/macro.cpp index bca5a98..7b422e9 100644 --- a/src/hotspot/share/opto/macro.cpp +++ b/src/hotspot/share/opto/macro.cpp @@ -227,105 +227,9 @@ void PhaseMacroExpand::extract_call_projections(CallNode *call) { } -// Eliminate a card mark sequence. p2x is a ConvP2XNode -void PhaseMacroExpand::eliminate_card_mark(Node* p2x) { - assert(p2x->Opcode() == Op_CastP2X, "ConvP2XNode required"); - if (!UseG1GC) { - // vanilla/CMS post barrier - Node *shift = p2x->unique_out(); - Node *addp = shift->unique_out(); - for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) { - Node *mem = addp->last_out(j); - if (UseCondCardMark && mem->is_Load()) { - assert(mem->Opcode() == Op_LoadB, "unexpected code shape"); - // The load is checking if the card has been written so - // replace it with zero to fold the test. - _igvn.replace_node(mem, intcon(0)); - continue; - } - assert(mem->is_Store(), "store required"); - _igvn.replace_node(mem, mem->in(MemNode::Memory)); - } - } else { - // G1 pre/post barriers - assert(p2x->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes"); - // It could be only one user, URShift node, in Object.clone() intrinsic - // but the new allocation is passed to arraycopy stub and it could not - // be scalar replaced. So we don't check the case. - - // An other case of only one user (Xor) is when the value check for NULL - // in G1 post barrier is folded after CCP so the code which used URShift - // is removed. - - // Take Region node before eliminating post barrier since it also - // eliminates CastP2X node when it has only one user. - Node* this_region = p2x->in(0); - assert(this_region != NULL, ""); - - // Remove G1 post barrier. - - // Search for CastP2X->Xor->URShift->Cmp path which - // checks if the store done to a different from the value's region. - // And replace Cmp with #0 (false) to collapse G1 post barrier. - Node* xorx = p2x->find_out_with(Op_XorX); - if (xorx != NULL) { - Node* shift = xorx->unique_out(); - Node* cmpx = shift->unique_out(); - assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && - cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, - "missing region check in G1 post barrier"); - _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ)); - - // Remove G1 pre barrier. - - // Search "if (marking != 0)" check and set it to "false". - // There is no G1 pre barrier if previous stored value is NULL - // (for example, after initialization). - if (this_region->is_Region() && this_region->req() == 3) { - int ind = 1; - if (!this_region->in(ind)->is_IfFalse()) { - ind = 2; - } - if (this_region->in(ind)->is_IfFalse() && - this_region->in(ind)->in(0)->Opcode() == Op_If) { - Node* bol = this_region->in(ind)->in(0)->in(1); - assert(bol->is_Bool(), ""); - cmpx = bol->in(1); - if (bol->as_Bool()->_test._test == BoolTest::ne && - cmpx->is_Cmp() && cmpx->in(2) == intcon(0) && - cmpx->in(1)->is_Load()) { - Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address); - const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()); - if (adr->is_AddP() && adr->in(AddPNode::Base) == top() && - adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal && - adr->in(AddPNode::Offset) == MakeConX(marking_offset)) { - _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ)); - } - } - } - } - } else { - assert(!GraphKit::use_ReduceInitialCardMarks(), "can only happen with card marking"); - // This is a G1 post barrier emitted by the Object.clone() intrinsic. - // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card - // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier. - Node* shift = p2x->find_out_with(Op_URShiftX); - assert(shift != NULL, "missing G1 post barrier"); - Node* addp = shift->unique_out(); - Node* load = addp->find_out_with(Op_LoadB); - assert(load != NULL, "missing G1 post barrier"); - Node* cmpx = load->unique_out(); - assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && - cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, - "missing card value check in G1 post barrier"); - _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ)); - // There is no G1 pre barrier in this case - } - // Now CastP2X can be removed since it is used only on dead path - // which currently still alive until igvn optimize it. - assert(p2x->outcnt() == 0 || p2x->unique_out()->Opcode() == Op_URShiftX, ""); - _igvn.replace_node(p2x, top()); - } +void PhaseMacroExpand::eliminate_gc_barrier(Node* p2x) { + BarrierSetC2 *bs = BarrierSet::barrier_set()->barrier_set_c2(); + bs->eliminate_gc_barrier(this, p2x); } // Search for a memory operation for the specified memory slice. @@ -1026,7 +930,7 @@ void PhaseMacroExpand::process_users_of_allocation(CallNode *alloc) { disconnect_projections(membar_after->as_MemBar(), _igvn); } } else { - eliminate_card_mark(n); + eliminate_gc_barrier(n); } k -= (oc2 - use->outcnt()); } @@ -1059,7 +963,7 @@ void PhaseMacroExpand::process_users_of_allocation(CallNode *alloc) { _igvn._worklist.push(ac); } else { - eliminate_card_mark(use); + eliminate_gc_barrier(use); } j -= (oc1 - res->outcnt()); } @@ -2798,5 +2702,6 @@ bool PhaseMacroExpand::expand_macro_nodes() { _igvn.set_delay_transform(false); _igvn.optimize(); if (C->failing()) return true; - return false; + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + return bs->expand_macro_nodes(this); } diff --git a/src/hotspot/share/opto/macro.hpp b/src/hotspot/share/opto/macro.hpp index 5482a1d..f58dd5d 100644 --- a/src/hotspot/share/opto/macro.hpp +++ b/src/hotspot/share/opto/macro.hpp @@ -38,10 +38,6 @@ private: PhaseIterGVN &_igvn; // Helper methods roughly modeled after GraphKit: - Node* top() const { return C->top(); } - Node* intcon(jint con) const { return _igvn.intcon(con); } - Node* longcon(jlong con) const { return _igvn.longcon(con); } - Node* makecon(const Type *t) const { return _igvn.makecon(t); } Node* basic_plus_adr(Node* base, int offset) { return (offset == 0)? base: basic_plus_adr(base, MakeConX(offset)); } @@ -94,7 +90,7 @@ private: bool scalar_replacement(AllocateNode *alloc, GrowableArray & safepoints_done); void process_users_of_allocation(CallNode *alloc); - void eliminate_card_mark(Node *cm); + void eliminate_gc_barrier(Node *p2x); void mark_eliminated_box(Node* box, Node* obj); void mark_eliminated_locking_nodes(AbstractLockNode *alock); bool eliminate_locking_node(AbstractLockNode *alock); @@ -209,6 +205,12 @@ public: void eliminate_macro_nodes(); bool expand_macro_nodes(); + // Members accessed from BarrierSetC2 + void replace_node(Node* source, Node* target) { _igvn.replace_node(source, target); } + Node* intcon(jint con) const { return _igvn.intcon(con); } + Node* longcon(jlong con) const { return _igvn.longcon(con); } + Node* makecon(const Type *t) const { return _igvn.makecon(t); } + Node* top() const { return C->top(); } }; #endif // SHARE_VM_OPTO_MACRO_HPP diff --git a/src/hotspot/share/opto/macroArrayCopy.cpp b/src/hotspot/share/opto/macroArrayCopy.cpp index bc93b76..aa60962 100644 --- a/src/hotspot/share/opto/macroArrayCopy.cpp +++ b/src/hotspot/share/opto/macroArrayCopy.cpp @@ -550,9 +550,9 @@ Node* PhaseMacroExpand::generate_arraycopy(ArrayCopyNode *ac, AllocateArrayNode* } // At this point we know we do not need type checks on oop stores. - // Let's see if we need card marks: - if (alloc != NULL && GraphKit::use_ReduceInitialCardMarks()) { - // If we do not need card marks, copy using the jint or jlong stub. + BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2(); + if (alloc != NULL && !bs->array_copy_requires_gc_barriers(copy_type)) { + // If we do not need gc barriers, copy using the jint or jlong stub. copy_type = LP64_ONLY(UseCompressedOops ? T_INT : T_LONG) NOT_LP64(T_INT); assert(type2aelembytes(basic_elem_type) == type2aelembytes(copy_type), "sizes agree"); diff --git a/src/hotspot/share/opto/parse2.cpp b/src/hotspot/share/opto/parse2.cpp index 3c632a8..1f49c09 100644 --- a/src/hotspot/share/opto/parse2.cpp +++ b/src/hotspot/share/opto/parse2.cpp @@ -51,29 +51,60 @@ extern int explicit_null_checks_inserted, #endif //---------------------------------array_load---------------------------------- -void Parse::array_load(BasicType elem_type) { - const Type* elem = Type::TOP; - Node* adr = array_addressing(elem_type, 0, &elem); +void Parse::array_load(BasicType bt) { + const Type* elemtype = Type::TOP; + bool big_val = bt == T_DOUBLE || bt == T_LONG; + Node* adr = array_addressing(bt, 0, &elemtype); if (stopped()) return; // guaranteed null or range check - dec_sp(2); // Pop array and index - const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); - Node* ld = make_load(control(), adr, elem, elem_type, adr_type, MemNode::unordered); - push(ld); + + pop(); // index (already used) + Node* array = pop(); // the array itself + + if (elemtype == TypeInt::BOOL) { + bt = T_BOOLEAN; + } else if (bt == T_OBJECT) { + elemtype = _gvn.type(array)->is_aryptr()->elem()->make_oopptr(); + } + + const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(bt); + + Node* ld = access_load_at(array, adr, adr_type, elemtype, bt, + IN_HEAP | IN_HEAP_ARRAY | C2_CONTROL_DEPENDENT_LOAD); + if (big_val) { + push_pair(ld); + } else { + push(ld); + } } //--------------------------------array_store---------------------------------- -void Parse::array_store(BasicType elem_type) { - const Type* elem = Type::TOP; - Node* adr = array_addressing(elem_type, 1, &elem); +void Parse::array_store(BasicType bt) { + const Type* elemtype = Type::TOP; + bool big_val = bt == T_DOUBLE || bt == T_LONG; + Node* adr = array_addressing(bt, big_val ? 2 : 1, &elemtype); if (stopped()) return; // guaranteed null or range check - Node* val = pop(); - dec_sp(2); // Pop array and index - const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); - if (elem == TypeInt::BOOL) { - elem_type = T_BOOLEAN; + if (bt == T_OBJECT) { + array_store_check(); + } + Node* val; // Oop to store + if (big_val) { + val = pop_pair(); + } else { + val = pop(); } - store_to_memory(control(), adr, val, elem_type, adr_type, StoreNode::release_if_reference(elem_type)); + pop(); // index (already used) + Node* array = pop(); // the array itself + + if (elemtype == TypeInt::BOOL) { + bt = T_BOOLEAN; + } else if (bt == T_OBJECT) { + elemtype = _gvn.type(array)->is_aryptr()->elem()->make_oopptr(); + } + + const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(bt); + + access_store_at(control(), array, adr, adr_type, val, elemtype, bt, MO_UNORDERED | IN_HEAP | IN_HEAP_ARRAY); } @@ -2141,61 +2172,23 @@ void Parse::do_one_bytecode() { break; } - case Bytecodes::_baload: array_load(T_BYTE); break; - case Bytecodes::_caload: array_load(T_CHAR); break; - case Bytecodes::_iaload: array_load(T_INT); break; - case Bytecodes::_saload: array_load(T_SHORT); break; - case Bytecodes::_faload: array_load(T_FLOAT); break; - case Bytecodes::_aaload: array_load(T_OBJECT); break; - case Bytecodes::_laload: { - a = array_addressing(T_LONG, 0); - if (stopped()) return; // guaranteed null or range check - dec_sp(2); // Pop array and index - push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS, MemNode::unordered)); - break; - } - case Bytecodes::_daload: { - a = array_addressing(T_DOUBLE, 0); - if (stopped()) return; // guaranteed null or range check - dec_sp(2); // Pop array and index - push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES, MemNode::unordered)); - break; - } - case Bytecodes::_bastore: array_store(T_BYTE); break; - case Bytecodes::_castore: array_store(T_CHAR); break; - case Bytecodes::_iastore: array_store(T_INT); break; - case Bytecodes::_sastore: array_store(T_SHORT); break; - case Bytecodes::_fastore: array_store(T_FLOAT); break; - case Bytecodes::_aastore: { - d = array_addressing(T_OBJECT, 1); - if (stopped()) return; // guaranteed null or range check - array_store_check(); - c = pop(); // Oop to store - b = pop(); // index (already used) - a = pop(); // the array itself - const TypeOopPtr* elemtype = _gvn.type(a)->is_aryptr()->elem()->make_oopptr(); - const TypeAryPtr* adr_type = TypeAryPtr::OOPS; - Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT, - StoreNode::release_if_reference(T_OBJECT)); - break; - } - case Bytecodes::_lastore: { - a = array_addressing(T_LONG, 2); - if (stopped()) return; // guaranteed null or range check - c = pop_pair(); - dec_sp(2); // Pop array and index - store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS, MemNode::unordered); - break; - } - case Bytecodes::_dastore: { - a = array_addressing(T_DOUBLE, 2); - if (stopped()) return; // guaranteed null or range check - c = pop_pair(); - dec_sp(2); // Pop array and index - c = dstore_rounding(c); - store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES, MemNode::unordered); - break; - } + case Bytecodes::_baload: array_load(T_BYTE); break; + case Bytecodes::_caload: array_load(T_CHAR); break; + case Bytecodes::_iaload: array_load(T_INT); break; + case Bytecodes::_saload: array_load(T_SHORT); break; + case Bytecodes::_faload: array_load(T_FLOAT); break; + case Bytecodes::_aaload: array_load(T_OBJECT); break; + case Bytecodes::_laload: array_load(T_LONG); break; + case Bytecodes::_daload: array_load(T_DOUBLE); break; + case Bytecodes::_bastore: array_store(T_BYTE); break; + case Bytecodes::_castore: array_store(T_CHAR); break; + case Bytecodes::_iastore: array_store(T_INT); break; + case Bytecodes::_sastore: array_store(T_SHORT); break; + case Bytecodes::_fastore: array_store(T_FLOAT); break; + case Bytecodes::_aastore: array_store(T_OBJECT); break; + case Bytecodes::_lastore: array_store(T_LONG); break; + case Bytecodes::_dastore: array_store(T_DOUBLE); break; + case Bytecodes::_getfield: do_getfield(); break; diff --git a/src/hotspot/share/opto/parse3.cpp b/src/hotspot/share/opto/parse3.cpp index 379e548..e46e3de 100644 --- a/src/hotspot/share/opto/parse3.cpp +++ b/src/hotspot/share/opto/parse3.cpp @@ -177,7 +177,12 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) { bool must_assert_null = false; - if( bt == T_OBJECT ) { + DecoratorSet decorators = IN_HEAP; + decorators |= is_vol ? MO_SEQ_CST : MO_UNORDERED; + + bool is_obj = bt == T_OBJECT || bt == T_ARRAY; + + if (is_obj) { if (!field->type()->is_loaded()) { type = TypeInstPtr::BOTTOM; must_assert_null = true; @@ -198,14 +203,8 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) { } else { type = Type::get_const_basic_type(bt); } - if (support_IRIW_for_not_multiple_copy_atomic_cpu && field->is_volatile()) { - insert_mem_bar(Op_MemBarVolatile); // StoreLoad barrier - } - // Build the load. - // - MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered; - bool needs_atomic_access = is_vol || AlwaysAtomicAccesses; - Node* ld = make_load(NULL, adr, type, bt, adr_type, mo, LoadNode::DependsOnlyOnTest, needs_atomic_access); + + Node* ld = access_load_at(obj, adr, adr_type, type, bt, decorators); // Adjust Java stack if (type2size[bt] == 1) @@ -236,22 +235,10 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) { null_assert(peek()); set_bci(iter().cur_bci()); // put it back } - - // If reference is volatile, prevent following memory ops from - // floating up past the volatile read. Also prevents commoning - // another volatile read. - if (field->is_volatile()) { - // Memory barrier includes bogus read of value to force load BEFORE membar - insert_mem_bar(Op_MemBarAcquire, ld); - } } void Parse::do_put_xxx(Node* obj, ciField* field, bool is_field) { bool is_vol = field->is_volatile(); - // If reference is volatile, prevent following memory ops from - // floating down past the volatile write. Also prevents commoning - // another volatile read. - if (is_vol) insert_mem_bar(Op_MemBarRelease); // Compute address and memory type. int offset = field->offset_in_bytes(); @@ -260,71 +247,50 @@ void Parse::do_put_xxx(Node* obj, ciField* field, bool is_field) { BasicType bt = field->layout_type(); // Value to be stored Node* val = type2size[bt] == 1 ? pop() : pop_pair(); - // Round doubles before storing - if (bt == T_DOUBLE) val = dstore_rounding(val); - - // Conservatively release stores of object references. - const MemNode::MemOrd mo = - is_vol ? - // Volatile fields need releasing stores. - MemNode::release : - // Non-volatile fields also need releasing stores if they hold an - // object reference, because the object reference might point to - // a freshly created object. - StoreNode::release_if_reference(bt); + + DecoratorSet decorators = IN_HEAP; + decorators |= is_vol ? MO_SEQ_CST : MO_UNORDERED; + + bool is_obj = bt == T_OBJECT || bt == T_ARRAY; // Store the value. - Node* store; - if (bt == T_OBJECT) { - const TypeOopPtr* field_type; - if (!field->type()->is_loaded()) { - field_type = TypeInstPtr::BOTTOM; - } else { + const Type* field_type; + if (!field->type()->is_loaded()) { + field_type = TypeInstPtr::BOTTOM; + } else { + if (is_obj) { field_type = TypeOopPtr::make_from_klass(field->type()->as_klass()); + } else { + field_type = Type::BOTTOM; } - store = store_oop_to_object(control(), obj, adr, adr_type, val, field_type, bt, mo); - } else { - bool needs_atomic_access = is_vol || AlwaysAtomicAccesses; - store = store_to_memory(control(), adr, val, bt, adr_type, mo, needs_atomic_access); } + access_store_at(control(), obj, adr, adr_type, val, field_type, bt, decorators); - // If reference is volatile, prevent following volatiles ops from - // floating up before the volatile write. - if (is_vol) { - // If not multiple copy atomic, we do the MemBarVolatile before the load. - if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { - insert_mem_bar(Op_MemBarVolatile); // Use fat membar - } + if (is_field) { // Remember we wrote a volatile field. // For not multiple copy atomic cpu (ppc64) a barrier should be issued // in constructors which have such stores. See do_exits() in parse1.cpp. - if (is_field) { + if (is_vol) { set_wrote_volatile(true); } - } - - if (is_field) { set_wrote_fields(true); - } - // If the field is final, the rules of Java say we are in or . - // Note the presence of writes to final non-static fields, so that we - // can insert a memory barrier later on to keep the writes from floating - // out of the constructor. - // Any method can write a @Stable field; insert memory barriers after those also. - if (is_field && (field->is_final() || field->is_stable())) { + // If the field is final, the rules of Java say we are in or . + // Note the presence of writes to final non-static fields, so that we + // can insert a memory barrier later on to keep the writes from floating + // out of the constructor. + // Any method can write a @Stable field; insert memory barriers after those also. if (field->is_final()) { - set_wrote_final(true); + set_wrote_final(true); + if (AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) { + // Preserve allocation ptr to create precedent edge to it in membar + // generated on exit from constructor. + // Can't bind stable with its allocation, only record allocation for final field. + set_alloc_with_final(obj); + } } if (field->is_stable()) { - set_wrote_stable(true); - } - - // Preserve allocation ptr to create precedent edge to it in membar - // generated on exit from constructor. - // Can't bind stable with its allocation, only record allocation for final field. - if (field->is_final() && AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) { - set_alloc_with_final(obj); + set_wrote_stable(true); } } } @@ -385,7 +351,7 @@ Node* Parse::expand_multianewarray(ciArrayKlass* array_klass, Node* *lengths, in Node* elem = expand_multianewarray(array_klass_1, &lengths[1], ndimensions-1, nargs); intptr_t offset = header + ((intptr_t)i << LogBytesPerHeapOop); Node* eaddr = basic_plus_adr(array, offset); - store_oop_to_array(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT, MemNode::unordered); + access_store_at(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT, IN_HEAP | IN_HEAP_ARRAY); } } return array; diff --git a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp index 8ef2691..eb5b6a2 100644 --- a/src/hotspot/share/opto/runtime.cpp +++ b/src/hotspot/share/opto/runtime.cpp @@ -95,8 +95,6 @@ address OptoRuntime::_multianewarray3_Java = NULL; address OptoRuntime::_multianewarray4_Java = NULL; address OptoRuntime::_multianewarray5_Java = NULL; address OptoRuntime::_multianewarrayN_Java = NULL; -address OptoRuntime::_g1_wb_pre_Java = NULL; -address OptoRuntime::_g1_wb_post_Java = NULL; address OptoRuntime::_vtable_must_compile_Java = NULL; address OptoRuntime::_complete_monitor_locking_Java = NULL; address OptoRuntime::_monitor_notify_Java = NULL; @@ -141,8 +139,6 @@ bool OptoRuntime::generate(ciEnv* env) { gen(env, _multianewarray4_Java , multianewarray4_Type , multianewarray4_C , 0 , true , false, false); gen(env, _multianewarray5_Java , multianewarray5_Type , multianewarray5_C , 0 , true , false, false); gen(env, _multianewarrayN_Java , multianewarrayN_Type , multianewarrayN_C , 0 , true , false, false); - gen(env, _g1_wb_pre_Java , g1_wb_pre_Type , SharedRuntime::g1_wb_pre , 0 , false, false, false); - gen(env, _g1_wb_post_Java , g1_wb_post_Type , SharedRuntime::g1_wb_post , 0 , false, false, false); gen(env, _complete_monitor_locking_Java , complete_monitor_enter_Type , SharedRuntime::complete_monitor_locking_C, 0, false, false, false); gen(env, _monitor_notify_Java , monitor_notify_Type , monitor_notify_C , 0 , false, false, false); gen(env, _monitor_notifyAll_Java , monitor_notify_Type , monitor_notifyAll_C , 0 , false, false, false); @@ -542,33 +538,6 @@ const TypeFunc *OptoRuntime::multianewarrayN_Type() { return TypeFunc::make(domain, range); } -const TypeFunc *OptoRuntime::g1_wb_pre_Type() { - const Type **fields = TypeTuple::fields(2); - fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value - fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); - - // create result type (range) - fields = TypeTuple::fields(0); - const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields); - - return TypeFunc::make(domain, range); -} - -const TypeFunc *OptoRuntime::g1_wb_post_Type() { - - const Type **fields = TypeTuple::fields(2); - fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Card addr - fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields); - - // create result type (range) - fields = TypeTuple::fields(0); - const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields); - - return TypeFunc::make(domain, range); -} - const TypeFunc *OptoRuntime::uncommon_trap_Type() { // create input type (domain) const Type **fields = TypeTuple::fields(1); diff --git a/src/hotspot/share/opto/runtime.hpp b/src/hotspot/share/opto/runtime.hpp index 54219de..8daf616 100644 --- a/src/hotspot/share/opto/runtime.hpp +++ b/src/hotspot/share/opto/runtime.hpp @@ -141,8 +141,6 @@ class OptoRuntime : public AllStatic { static address _multianewarray4_Java; static address _multianewarray5_Java; static address _multianewarrayN_Java; - static address _g1_wb_pre_Java; - static address _g1_wb_post_Java; static address _vtable_must_compile_Java; static address _complete_monitor_locking_Java; static address _rethrow_Java; @@ -170,8 +168,6 @@ class OptoRuntime : public AllStatic { static void multianewarray4_C(Klass* klass, int len1, int len2, int len3, int len4, JavaThread *thread); static void multianewarray5_C(Klass* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread); static void multianewarrayN_C(Klass* klass, arrayOopDesc* dims, JavaThread *thread); - static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread); - static void g1_wb_post_C(void* card_addr, JavaThread* thread); public: // Slow-path Locking and Unlocking @@ -223,8 +219,6 @@ private: static address multianewarray4_Java() { return _multianewarray4_Java; } static address multianewarray5_Java() { return _multianewarray5_Java; } static address multianewarrayN_Java() { return _multianewarrayN_Java; } - static address g1_wb_pre_Java() { return _g1_wb_pre_Java; } - static address g1_wb_post_Java() { return _g1_wb_post_Java; } static address vtable_must_compile_stub() { return _vtable_must_compile_Java; } static address complete_monitor_locking_Java() { return _complete_monitor_locking_Java; } static address monitor_notify_Java() { return _monitor_notify_Java; } @@ -257,8 +251,6 @@ private: static const TypeFunc* multianewarray4_Type(); // multianewarray static const TypeFunc* multianewarray5_Type(); // multianewarray static const TypeFunc* multianewarrayN_Type(); // multianewarray - static const TypeFunc* g1_wb_pre_Type(); - static const TypeFunc* g1_wb_post_Type(); static const TypeFunc* complete_monitor_enter_Type(); static const TypeFunc* complete_monitor_exit_Type(); static const TypeFunc* monitor_notify_Type();