# HG changeset patch # User goetz # Date 1384272833 -3600 # Node ID 29ac6fed7a38c6187b5ca2514b395e8b2f617e35 # Parent eb178e97560c4efc844dfe695e18cc8777b7c433 8024921: PPC64 (part 113): Extend Load and Store nodes to know about memory ordering. diff --git a/src/share/vm/opto/generateOptoStub.cpp b/src/share/vm/opto/generateOptoStub.cpp --- a/src/share/vm/opto/generateOptoStub.cpp +++ b/src/share/vm/opto/generateOptoStub.cpp @@ -104,13 +104,12 @@ // Node *adr_sp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_sp_offset())); Node *last_sp = basic_plus_adr(top(), frameptr(), (intptr_t) STACK_BIAS); - store_to_memory(NULL, adr_sp, last_sp, T_ADDRESS, NoAlias); + store_to_memory(NULL, adr_sp, last_sp, T_ADDRESS, NoAlias, false, StoreNode::unordered); // Set _thread_in_native // The order of stores into TLS is critical! Setting _thread_in_native MUST // be last, because a GC is allowed at any time after setting it and the GC // will require last_Java_pc and last_Java_sp. - Node* adr_state = basic_plus_adr(top(), thread, in_bytes(JavaThread::thread_state_offset())); //----------------------------- // Compute signature for C call. Varies from the Java signature! @@ -225,16 +224,15 @@ //----------------------------- // Clear last_Java_sp - store_to_memory(NULL, adr_sp, null(), T_ADDRESS, NoAlias); + store_to_memory(NULL, adr_sp, null(), T_ADDRESS, NoAlias, false, StoreNode::unordered); // Clear last_Java_pc and (optionally)_flags - store_to_memory(NULL, adr_last_Java_pc, null(), T_ADDRESS, NoAlias); + store_to_memory(NULL, adr_last_Java_pc, null(), T_ADDRESS, NoAlias, false, StoreNode::unordered); #if defined(SPARC) - store_to_memory(NULL, adr_flags, intcon(0), T_INT, NoAlias); + store_to_memory(NULL, adr_flags, intcon(0), T_INT, NoAlias, false, StoreNode::unordered); #endif /* defined(SPARC) */ #if (defined(IA64) && !defined(AIX)) Node* adr_last_Java_fp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_fp_offset())); - if( os::is_MP() ) insert_mem_bar(Op_MemBarRelease); - store_to_memory(NULL, adr_last_Java_fp, null(), T_ADDRESS, NoAlias); + store_to_memory(NULL, adr_last_Java_fp, null(), T_ADDRESS, NoAlias, false, StoreNode::unordered); #endif // For is-fancy-jump, the C-return value is also the branch target @@ -242,16 +240,16 @@ // Runtime call returning oop in TLS? Fetch it out if( pass_tls ) { Node* adr = basic_plus_adr(top(), thread, in_bytes(JavaThread::vm_result_offset())); - Node* vm_result = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false); + Node* vm_result = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false, LoadNode::unordered); map()->set_req(TypeFunc::Parms, vm_result); // vm_result passed as result // clear thread-local-storage(tls) - store_to_memory(NULL, adr, null(), T_ADDRESS, NoAlias); + store_to_memory(NULL, adr, null(), T_ADDRESS, NoAlias, false, StoreNode::unordered); } //----------------------------- // check exception Node* adr = basic_plus_adr(top(), thread, in_bytes(Thread::pending_exception_offset())); - Node* pending = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false); + Node* pending = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false, LoadNode::unordered); Node* exit_memory = reset_memory(); diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp --- a/src/share/vm/opto/graphKit.cpp +++ b/src/share/vm/opto/graphKit.cpp @@ -494,7 +494,7 @@ // first must access the should_post_on_exceptions_flag in this thread's JavaThread Node* jthread = _gvn.transform(new (C) ThreadLocalNode()); Node* adr = basic_plus_adr(top(), jthread, in_bytes(JavaThread::should_post_on_exceptions_flag_offset())); - Node* should_post_flag = make_load(control(), adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw, false); + Node* should_post_flag = make_load(control(), adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw, false, LoadNode::unordered); // Test the should_post_on_exceptions_flag vs. 0 Node* chk = _gvn.transform( new (C) CmpINode(should_post_flag, intcon(0)) ); @@ -596,7 +596,8 @@ Node *adr = basic_plus_adr(ex_node, ex_node, offset); const TypeOopPtr* val_type = TypeOopPtr::make_from_klass(env()->String_klass()); - Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT); + // Conservatively release stores of object references. + Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT, StoreNode::release); add_exception_state(make_exception_state(ex_node)); return; @@ -1483,16 +1484,16 @@ // factory methods in "int adr_idx" Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, - bool require_atomic_access) { + bool require_atomic_access, LoadNode::Sem sem) { assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" ); const TypePtr* adr_type = NULL; // debug-mode-only argument debug_only(adr_type = C->get_adr_type(adr_idx)); Node* mem = memory(adr_idx); Node* ld; if (require_atomic_access && bt == T_LONG) { - ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t); + ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t, sem); } else { - ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt); + ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, sem); } ld = _gvn.transform(ld); if ((bt == T_OBJECT) && C->do_escape_analysis() || C->eliminate_boxing()) { @@ -1504,16 +1505,17 @@ Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt, int adr_idx, - bool require_atomic_access) { + bool require_atomic_access, + StoreNode::Sem sem) { assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); const TypePtr* adr_type = NULL; debug_only(adr_type = C->get_adr_type(adr_idx)); Node *mem = memory(adr_idx); Node* st; if (require_atomic_access && bt == T_LONG) { - st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val); + st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val, sem); } else { - st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt); + st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, sem); } st = _gvn.transform(st); set_memory(st, adr_idx); @@ -1613,7 +1615,8 @@ Node* val, const TypeOopPtr* val_type, BasicType bt, - bool use_precise) { + bool use_precise, + StoreNode::Sem sem) { // Transformation of a value which could be NULL pointer (CastPP #NULL) // could be delayed during Parse (for example, in adjust_map_after_if()). // Execute transformation here to avoid barrier generation in such case. @@ -1633,7 +1636,7 @@ NULL /* pre_val */, bt); - Node* store = store_to_memory(control(), adr, val, bt, adr_idx); + Node* store = store_to_memory(control(), adr, val, bt, adr_idx, false, sem); post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise); return store; } @@ -1644,7 +1647,8 @@ Node* adr, // actual adress to store val at const TypePtr* adr_type, Node* val, - BasicType bt) { + BasicType bt, + StoreNode::Sem sem) { Compile::AliasType* at = C->alias_type(adr_type); const TypeOopPtr* val_type = NULL; if (adr_type->isa_instptr()) { @@ -1663,7 +1667,7 @@ if (val_type == NULL) { val_type = TypeInstPtr::BOTTOM; } - return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true); + return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, sem); } @@ -1707,7 +1711,7 @@ const Type* elemtype = arytype->elem(); BasicType elembt = elemtype->array_element_basic_type(); Node* adr = array_element_address(ary, idx, elembt, arytype->size()); - Node* ld = make_load(ctl, adr, elemtype, elembt, arytype); + Node* ld = make_load(ctl, adr, elemtype, elembt, arytype, false, LoadNode::unordered); return ld; } @@ -1942,9 +1946,9 @@ void GraphKit::increment_counter(Node* counter_addr) { int adr_type = Compile::AliasIdxRaw; Node* ctrl = control(); - Node* cnt = make_load(ctrl, counter_addr, TypeInt::INT, T_INT, adr_type); + Node* cnt = make_load(ctrl, counter_addr, TypeInt::INT, T_INT, adr_type, false, LoadNode::unordered); Node* incr = _gvn.transform(new (C) AddINode(cnt, _gvn.intcon(1))); - store_to_memory( ctrl, counter_addr, incr, T_INT, adr_type ); + store_to_memory(ctrl, counter_addr, incr, T_INT, adr_type, false, StoreNode::unordered); } @@ -2525,7 +2529,8 @@ // First load the super-klass's check-offset Node *p1 = basic_plus_adr( superklass, superklass, in_bytes(Klass::super_check_offset_offset()) ); - Node *chk_off = _gvn.transform( new (C) LoadINode( NULL, memory(p1), p1, _gvn.type(p1)->is_ptr() ) ); + Node *chk_off = _gvn.transform(new (C) LoadINode(NULL, memory(p1), p1, _gvn.type(p1)->is_ptr(), + TypeInt::INT, LoadNode::unordered)); int cacheoff_con = in_bytes(Klass::secondary_super_cache_offset()); bool might_be_cache = (find_int_con(chk_off, cacheoff_con) == cacheoff_con); @@ -3238,7 +3243,7 @@ } constant_value = Klass::_lh_neutral_value; // put in a known value Node* lhp = basic_plus_adr(klass_node, klass_node, in_bytes(Klass::layout_helper_offset())); - return make_load(NULL, lhp, TypeInt::INT, T_INT); + return make_load(NULL, lhp, TypeInt::INT, T_INT, false, LoadNode::unordered); } // We just put in an allocate/initialize with a big raw-memory effect. @@ -3773,7 +3778,7 @@ // Smash zero into card if( !UseConcMarkSweepGC ) { - __ store(__ ctrl(), card_adr, zero, bt, adr_type); + __ store(__ ctrl(), card_adr, zero, bt, adr_type, false, StoreNode::release); } else { // Specialized path for CM store barrier __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type); @@ -3870,9 +3875,9 @@ // Now get the buffer location we will log the previous value into and store it Node *log_addr = __ AddP(no_base, buffer, next_index); - __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw); + __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, false, StoreNode::unordered); // update the index - __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw); + __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, false, StoreNode::unordered); } __ else_(); { @@ -3912,8 +3917,9 @@ Node* next_index = _gvn.transform(new (C) SubXNode(index, __ ConX(sizeof(intptr_t)))); Node* log_addr = __ AddP(no_base, buffer, next_index); - __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw); - __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw); + // Order, see storeCM. + __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, false, StoreNode::unordered); + __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, false, StoreNode::unordered); } __ else_(); { __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread()); @@ -4043,7 +4049,7 @@ int offset_field_idx = C->get_alias_index(offset_field_type); return make_load(ctrl, basic_plus_adr(str, str, offset_offset), - TypeInt::INT, T_INT, offset_field_idx); + TypeInt::INT, T_INT, offset_field_idx, false, LoadNode::unordered); } else { return intcon(0); } @@ -4058,7 +4064,7 @@ int count_field_idx = C->get_alias_index(count_field_type); return make_load(ctrl, basic_plus_adr(str, str, count_offset), - TypeInt::INT, T_INT, count_field_idx); + TypeInt::INT, T_INT, count_field_idx, false, LoadNode::unordered); } else { return load_array_length(load_String_value(ctrl, str)); } @@ -4074,7 +4080,7 @@ ciTypeArrayKlass::make(T_CHAR), true, 0); int value_field_idx = C->get_alias_index(value_field_type); Node* load = make_load(ctrl, basic_plus_adr(str, str, value_offset), - value_type, T_OBJECT, value_field_idx); + value_type, T_OBJECT, value_field_idx, false, LoadNode::unordered); // String.value field is known to be @Stable. if (UseImplicitStableValues) { load = cast_array_to_stable(load, value_type); @@ -4089,7 +4095,7 @@ const TypePtr* offset_field_type = string_type->add_offset(offset_offset); int offset_field_idx = C->get_alias_index(offset_field_type); store_to_memory(ctrl, basic_plus_adr(str, offset_offset), - value, T_INT, offset_field_idx); + value, T_INT, offset_field_idx, false, StoreNode::unordered); } void GraphKit::store_String_value(Node* ctrl, Node* str, Node* value) { @@ -4099,7 +4105,7 @@ const TypePtr* value_field_type = string_type->add_offset(value_offset); store_oop_to_object(ctrl, str, basic_plus_adr(str, value_offset), value_field_type, - value, TypeAryPtr::CHARS, T_OBJECT); + value, TypeAryPtr::CHARS, T_OBJECT, false, StoreNode::unordered); } void GraphKit::store_String_length(Node* ctrl, Node* str, Node* value) { @@ -4109,7 +4115,7 @@ const TypePtr* count_field_type = string_type->add_offset(count_offset); int count_field_idx = C->get_alias_index(count_field_type); store_to_memory(ctrl, basic_plus_adr(str, count_offset), - value, T_INT, count_field_idx); + value, T_INT, count_field_idx, false, StoreNode::unordered); } Node* GraphKit::cast_array_to_stable(Node* ary, const TypeAryPtr* ary_type) { diff --git a/src/share/vm/opto/graphKit.hpp b/src/share/vm/opto/graphKit.hpp --- a/src/share/vm/opto/graphKit.hpp +++ b/src/share/vm/opto/graphKit.hpp @@ -510,37 +510,51 @@ // Create a LoadNode, reading from the parser's memory state. // (Note: require_atomic_access is useful only with T_LONG.) + // + // We choose the unordered semantics by default because we have + // adapted the `do_put_xxx' and `do_get_xxx' procedures for the case + // of volatile fields. Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, - bool require_atomic_access = false) { + bool require_atomic_access, LoadNode::Sem sem) { // This version computes alias_index from bottom_type return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(), - require_atomic_access); + require_atomic_access, sem); } - Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type, bool require_atomic_access = false) { + Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type, + bool require_atomic_access, LoadNode::Sem sem) { // This version computes alias_index from an address type assert(adr_type != NULL, "use other make_load factory"); return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type), - require_atomic_access); + require_atomic_access, sem); } // This is the base version which is given an alias index. - Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, bool require_atomic_access = false); + Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, + bool require_atomic_access, LoadNode::Sem sem); // Create & transform a StoreNode and store the effect into the // parser's memory state. + // + // We must ensure that stores of object references will be visible + // only after the object's initialization. So the clients of this + // procedure must indicate that the store requires `release' + // semantics, if the stored value is an object reference that might + // point to a new object and may become externally visible. Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt, const TypePtr* adr_type, - bool require_atomic_access = false) { + bool require_atomic_access, + StoreNode::Sem sem) { // This version computes alias_index from an address type assert(adr_type != NULL, "use other store_to_memory factory"); return store_to_memory(ctl, adr, val, bt, C->get_alias_index(adr_type), - require_atomic_access); + require_atomic_access, sem); } // This is the base version which is given alias index // Return the new StoreXNode Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt, int adr_idx, - bool require_atomic_access = false); + bool require_atomic_access, + StoreNode::Sem); // All in one pre-barrier, store, post_barrier @@ -562,7 +576,8 @@ Node* val, const TypeOopPtr* val_type, BasicType bt, - bool use_precise); + bool use_precise, + StoreNode::Sem sem); Node* store_oop_to_object(Node* ctl, Node* obj, // containing obj @@ -570,8 +585,9 @@ const TypePtr* adr_type, Node* val, const TypeOopPtr* val_type, - BasicType bt) { - return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false); + BasicType bt, + StoreNode::Sem sem) { + return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false, sem); } Node* store_oop_to_array(Node* ctl, @@ -580,8 +596,9 @@ const TypePtr* adr_type, Node* val, const TypeOopPtr* val_type, - BasicType bt) { - return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true); + BasicType bt, + StoreNode::Sem sem) { + return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, sem); } // Could be an array or object we don't know at compile time (unsafe ref.) @@ -590,7 +607,8 @@ Node* adr, // actual adress to store val at const TypePtr* adr_type, Node* val, - BasicType bt); + BasicType bt, + StoreNode::Sem sem); // For the few case where the barriers need special help void pre_barrier(bool do_load, Node* ctl, diff --git a/src/share/vm/opto/idealKit.cpp b/src/share/vm/opto/idealKit.cpp --- a/src/share/vm/opto/idealKit.cpp +++ b/src/share/vm/opto/idealKit.cpp @@ -359,25 +359,25 @@ Node* mem = memory(adr_idx); Node* ld; if (require_atomic_access && bt == T_LONG) { - ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t); + ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t, LoadNode::unordered); } else { - ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt); + ld = LoadNode::make(_gvn, ctl, mem, adr, adr_type, t, bt, LoadNode::unordered); } return transform(ld); } Node* IdealKit::store(Node* ctl, Node* adr, Node *val, BasicType bt, int adr_idx, - bool require_atomic_access) { - assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" ); + bool require_atomic_access, StoreNode::Sem sem) { + assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory"); const TypePtr* adr_type = NULL; debug_only(adr_type = C->get_adr_type(adr_idx)); Node *mem = memory(adr_idx); Node* st; if (require_atomic_access && bt == T_LONG) { - st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val); + st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val, sem); } else { - st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt); + st = StoreNode::make(_gvn, ctl, mem, adr, adr_type, val, bt, sem); } st = transform(st); set_memory(st, adr_idx); diff --git a/src/share/vm/opto/idealKit.hpp b/src/share/vm/opto/idealKit.hpp --- a/src/share/vm/opto/idealKit.hpp +++ b/src/share/vm/opto/idealKit.hpp @@ -226,7 +226,8 @@ Node* val, BasicType bt, int adr_idx, - bool require_atomic_access = false); + bool require_atomic_access, + StoreNode::Sem sem); // Store a card mark ordered after store_oop Node* storeCM(Node* ctl, diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp --- a/src/share/vm/opto/library_call.cpp +++ b/src/share/vm/opto/library_call.cpp @@ -1057,7 +1057,7 @@ const Type* thread_type = TypeOopPtr::make_from_klass(thread_klass)->cast_to_ptr_type(TypePtr::NotNull); Node* thread = _gvn.transform(new (C) ThreadLocalNode()); Node* p = basic_plus_adr(top()/*!oop*/, thread, in_bytes(JavaThread::threadObj_offset())); - Node* threadObj = make_load(NULL, p, thread_type, T_OBJECT); + Node* threadObj = make_load(NULL, p, thread_type, T_OBJECT, false, LoadNode::unordered); tls_output = thread; return threadObj; } @@ -2640,7 +2640,7 @@ if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder); if (!is_store) { - Node* p = make_load(control(), adr, value_type, type, adr_type, is_volatile); + Node* p = make_load(control(), adr, value_type, type, adr_type, is_volatile, LoadNode::unordered); // load value switch (type) { case T_BOOLEAN: @@ -2684,13 +2684,14 @@ break; } + StoreNode::Sem sem = is_volatile ? StoreNode::release : StoreNode::unordered; if (type != T_OBJECT ) { - (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile); + (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile, sem); } else { // Possibly an oop being stored to Java heap or native memory if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) { // oop to Java heap. - (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type); + (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, sem); } else { // We can't tell at compile time if we are storing in the Java heap or outside // of it. So we need to emit code to conditionally do the proper type of @@ -2702,11 +2703,11 @@ __ if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); { // Sync IdealKit and graphKit. sync_kit(ideal); - Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type); + Node* st = store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, sem); // Update IdealKit memory. __ sync_kit(this); } __ else_(); { - __ store(__ ctrl(), adr, val, type, alias_type->index(), is_volatile); + __ store(__ ctrl(), adr, val, type, alias_type->index(), false, sem); } __ end_if(); // Final sync IdealKit and GraphKit. final_sync(ideal); @@ -3028,7 +3029,13 @@ // Add the trailing membar surrounding the access insert_mem_bar(Op_MemBarCPUOrder); + // On power we need a fence to prevent succeeding loads from floating + // above the store of the compare-exchange. +#ifdef PPC64 + insert_mem_bar(Op_MemBarVolatile); +#else insert_mem_bar(Op_MemBarAcquire); +#endif assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match"); set_result(load_store); @@ -3090,9 +3097,9 @@ const bool require_atomic_access = true; Node* store; if (type == T_OBJECT) // reference stores need a store barrier. - store = store_oop_to_unknown(control(), base, adr, adr_type, val, type); + store = store_oop_to_unknown(control(), base, adr, adr_type, val, type, StoreNode::release); else { - store = store_to_memory(control(), adr, val, type, adr_type, require_atomic_access); + store = store_to_memory(control(), adr, val, type, adr_type, require_atomic_access, StoreNode::release); } insert_mem_bar(Op_MemBarCPUOrder); return true; @@ -3101,13 +3108,29 @@ bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) { // Regardless of form, don't allow previous ld/st to move down, // then issue acquire, release, or volatile mem_bar. +#if !defined PPC64 && !defined(IA64) insert_mem_bar(Op_MemBarCPUOrder); +#endif switch(id) { case vmIntrinsics::_loadFence: + // On PPC and IA64 MemBarAcquire is implemented empty, as the acquire + // is issued together with the Load instructions. On IA64, MemBarRelease + // is empty for the same reason. +#ifdef PPC64 + insert_mem_bar(Op_MemBarRelease); + insert_mem_bar(Op_MemBarCPUOrder); +#elif defined(IA64) + insert_mem_bar(Op_MemBarVolatile); +#else insert_mem_bar(Op_MemBarAcquire); +#endif return true; case vmIntrinsics::_storeFence: +#ifndef IA64 insert_mem_bar(Op_MemBarRelease); +#else + insert_mem_bar(Op_MemBarVolatile); +#endif return true; case vmIntrinsics::_fullFence: insert_mem_bar(Op_MemBarVolatile); @@ -3152,7 +3175,7 @@ Node* insp = basic_plus_adr(kls, in_bytes(InstanceKlass::init_state_offset())); // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler // can generate code to load it as unsigned byte. - Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN); + Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN, false, LoadNode::unordered); Node* bits = intcon(InstanceKlass::fully_initialized); test = _gvn.transform(new (C) SubINode(inst, bits)); // The 'test' is non-zero if we need to take a slow path. @@ -3275,11 +3298,11 @@ // (b) Interrupt bit on TLS must be false. Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset())); - Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS); + Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS, false, LoadNode::unordered); p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset())); // Set the control input on the field _interrupted read to prevent it floating up. - Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT); + Node* int_bit = make_load(control(), p, TypeInt::BOOL, T_INT, false, LoadNode::unordered); Node* cmp_bit = _gvn.transform(new (C) CmpINode(int_bit, intcon(0))); Node* bol_bit = _gvn.transform(new (C) BoolNode(cmp_bit, BoolTest::ne)); @@ -3347,7 +3370,7 @@ // Given a klass oop, load its java mirror (a java.lang.Class oop). Node* LibraryCallKit::load_mirror_from_klass(Node* klass) { Node* p = basic_plus_adr(klass, in_bytes(Klass::java_mirror_offset())); - return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT); + return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT, false, LoadNode::unordered); } //-----------------------load_klass_from_mirror_common------------------------- @@ -3384,7 +3407,7 @@ // Branch around if the given klass has the given modifier bit set. // Like generate_guard, adds a new path onto the region. Node* modp = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset())); - Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT); + Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT, false, LoadNode::unordered); Node* mask = intcon(modifier_mask); Node* bits = intcon(modifier_bits); Node* mbit = _gvn.transform(new (C) AndINode(mods, mask)); @@ -3501,7 +3524,7 @@ case vmIntrinsics::_getModifiers: p = basic_plus_adr(kls, in_bytes(Klass::modifier_flags_offset())); - query_value = make_load(NULL, p, TypeInt::INT, T_INT); + query_value = make_load(NULL, p, TypeInt::INT, T_INT, false, LoadNode::unordered); break; case vmIntrinsics::_isInterface: @@ -3559,7 +3582,7 @@ // Be sure to pin the oop load to the guard edge just created: Node* is_array_ctrl = region->in(region->req()-1); Node* cma = basic_plus_adr(kls, in_bytes(ArrayKlass::component_mirror_offset())); - Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT); + Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT, false, LoadNode::unordered); phi->add_req(cmo); } query_value = null(); // non-array case is null @@ -3567,7 +3590,7 @@ case vmIntrinsics::_getClassAccessFlags: p = basic_plus_adr(kls, in_bytes(Klass::access_flags_offset())); - query_value = make_load(NULL, p, TypeInt::INT, T_INT); + query_value = make_load(NULL, p, TypeInt::INT, T_INT, false, LoadNode::unordered); break; default: @@ -3933,7 +3956,7 @@ vtable_index*vtableEntry::size()) * wordSize + vtableEntry::method_offset_in_bytes(); Node* entry_addr = basic_plus_adr(obj_klass, entry_offset); - Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS); + Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS, false, LoadNode::unordered); // Compare the target method with the expected method (e.g., Object.hashCode). const TypePtr* native_call_addr = TypeMetadataPtr::make(method); @@ -4059,7 +4082,7 @@ // Get the header out of the object, use LoadMarkNode when available Node* header_addr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes()); - Node* header = make_load(control(), header_addr, TypeX_X, TypeX_X->basic_type()); + Node* header = make_load(control(), header_addr, TypeX_X, TypeX_X->basic_type(), false, LoadNode::unordered); // Test the header to see if it is unlocked. Node *lock_mask = _gvn.MakeConX(markOopDesc::biased_lock_mask_in_place); @@ -5480,7 +5503,7 @@ // Store a zero to the immediately preceding jint: Node* x1 = _gvn.transform(new(C) AddXNode(start, MakeConX(-bump_bit))); Node* p1 = basic_plus_adr(dest, x1); - mem = StoreNode::make(_gvn, control(), mem, p1, adr_type, intcon(0), T_INT); + mem = StoreNode::make(_gvn, control(), mem, p1, adr_type, intcon(0), T_INT, StoreNode::unordered); mem = _gvn.transform(mem); } } @@ -5530,8 +5553,8 @@ ((src_off ^ dest_off) & (BytesPerLong-1)) == 0) { Node* sptr = basic_plus_adr(src, src_off); Node* dptr = basic_plus_adr(dest, dest_off); - Node* sval = make_load(control(), sptr, TypeInt::INT, T_INT, adr_type); - store_to_memory(control(), dptr, sval, T_INT, adr_type); + Node* sval = make_load(control(), sptr, TypeInt::INT, T_INT, adr_type, false, LoadNode::unordered); + store_to_memory(control(), dptr, sval, T_INT, adr_type, false, StoreNode::unordered); src_off += BytesPerInt; dest_off += BytesPerInt; } else { @@ -5596,7 +5619,7 @@ // super_check_offset, for the desired klass. int sco_offset = in_bytes(Klass::super_check_offset_offset()); Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset); - Node* n3 = new(C) LoadINode(NULL, memory(p3), p3, _gvn.type(p3)->is_ptr()); + Node* n3 = new(C) LoadINode(NULL, memory(p3), p3, _gvn.type(p3)->is_ptr(), TypeInt::INT, LoadNode::unordered); Node* check_offset = ConvI2X(_gvn.transform(n3)); Node* check_value = dest_elem_klass; @@ -5737,7 +5760,7 @@ Node* base = makecon(TypeRawPtr::make(StubRoutines::crc_table_addr())); Node* offset = _gvn.transform(new (C) LShiftINode(result, intcon(0x2))); Node* adr = basic_plus_adr(top(), base, ConvI2X(offset)); - result = make_load(control(), adr, TypeInt::INT, T_INT); + result = make_load(control(), adr, TypeInt::INT, T_INT, false, LoadNode::unordered); crc = _gvn.transform(new (C) URShiftINode(crc, intcon(8))); result = _gvn.transform(new (C) XorINode(crc, result)); @@ -5838,7 +5861,7 @@ const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass); Node* no_ctrl = NULL; - Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT); + Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT, false, LoadNode::unordered); // Use the pre-barrier to record the value in the referent field pre_barrier(false /* do_load */, @@ -5885,7 +5908,7 @@ const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass()); // Build the load. - Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol); + Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol, LoadNode::unordered); return loadedField; } diff --git a/src/share/vm/opto/macro.cpp b/src/share/vm/opto/macro.cpp --- a/src/share/vm/opto/macro.cpp +++ b/src/share/vm/opto/macro.cpp @@ -1084,7 +1084,7 @@ Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) { Node* adr = basic_plus_adr(base, offset); const TypePtr* adr_type = adr->bottom_type()->is_ptr(); - Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt); + Node* value = LoadNode::make(_igvn, ctl, mem, adr, adr_type, value_type, bt, LoadNode::unordered); transform_later(value); return value; } @@ -1092,7 +1092,7 @@ Node* PhaseMacroExpand::make_store(Node* ctl, Node* mem, Node* base, int offset, Node* value, BasicType bt) { Node* adr = basic_plus_adr(base, offset); - mem = StoreNode::make(_igvn, ctl, mem, adr, NULL, value, bt); + mem = StoreNode::make(_igvn, ctl, mem, adr, NULL, value, bt, StoreNode::unordered); transform_later(mem); return mem; } @@ -1272,8 +1272,8 @@ // Load(-locked) the heap top. // See note above concerning the control input when using a TLAB Node *old_eden_top = UseTLAB - ? new (C) LoadPNode (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM) - : new (C) LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr); + ? new (C) LoadPNode (ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, LoadNode::unordered) + : new (C) LoadPLockedNode(contended_region, contended_phi_rawmem, eden_top_adr, LoadNode::acquire); transform_later(old_eden_top); // Add to heap top to get a new heap top @@ -1320,7 +1320,7 @@ if (UseTLAB) { Node* store_eden_top = new (C) StorePNode(needgc_false, contended_phi_rawmem, eden_top_adr, - TypeRawPtr::BOTTOM, new_eden_top); + TypeRawPtr::BOTTOM, new_eden_top, StoreNode::unordered); transform_later(store_eden_top); fast_oop_ctrl = needgc_false; // No contention, so this is the fast path fast_oop_rawmem = store_eden_top; @@ -1700,9 +1700,10 @@ _igvn.MakeConX(in_bytes(JavaThread::tlab_pf_top_offset())) ); transform_later(eden_pf_adr); - Node *old_pf_wm = new (C) LoadPNode( needgc_false, + Node *old_pf_wm = new (C) LoadPNode(needgc_false, contended_phi_rawmem, eden_pf_adr, - TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM ); + TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, + LoadNode::unordered); transform_later(old_pf_wm); // check against new_eden_top @@ -1726,9 +1727,10 @@ transform_later(new_pf_wmt ); new_pf_wmt->set_req(0, need_pf_true); - Node *store_new_wmt = new (C) StorePNode( need_pf_true, + Node *store_new_wmt = new (C) StorePNode(need_pf_true, contended_phi_rawmem, eden_pf_adr, - TypeRawPtr::BOTTOM, new_pf_wmt ); + TypeRawPtr::BOTTOM, new_pf_wmt, + StoreNode::unordered); transform_later(store_new_wmt); // adding prefetches diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp --- a/src/share/vm/opto/matcher.cpp +++ b/src/share/vm/opto/matcher.cpp @@ -825,16 +825,15 @@ // Compute generic short-offset Loads #ifdef _LP64 - MachNode *spillCP = match_tree(new (C) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM)); + MachNode *spillCP = match_tree(new (C) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,LoadNode::unordered)); #endif - MachNode *spillI = match_tree(new (C) LoadINode(NULL,mem,fp,atp)); - MachNode *spillL = match_tree(new (C) LoadLNode(NULL,mem,fp,atp)); - MachNode *spillF = match_tree(new (C) LoadFNode(NULL,mem,fp,atp)); - MachNode *spillD = match_tree(new (C) LoadDNode(NULL,mem,fp,atp)); - MachNode *spillP = match_tree(new (C) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM)); + MachNode *spillI = match_tree(new (C) LoadINode(NULL,mem,fp,atp,TypeInt::INT,LoadNode::unordered)); + MachNode *spillL = match_tree(new (C) LoadLNode(NULL,mem,fp,atp,TypeLong::LONG,false,LoadNode::unordered)); + MachNode *spillF = match_tree(new (C) LoadFNode(NULL,mem,fp,atp,Type::FLOAT,LoadNode::unordered)); + MachNode *spillD = match_tree(new (C) LoadDNode(NULL,mem,fp,atp,Type::DOUBLE,LoadNode::unordered)); + MachNode *spillP = match_tree(new (C) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM,LoadNode::unordered)); assert(spillI != NULL && spillL != NULL && spillF != NULL && spillD != NULL && spillP != NULL, ""); - // Get the ADLC notion of the right regmask, for each basic type. #ifdef _LP64 idealreg2regmask[Op_RegN] = &spillCP->out_RegMask(); diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp --- a/src/share/vm/opto/memnode.cpp +++ b/src/share/vm/opto/memnode.cpp @@ -907,7 +907,7 @@ //----------------------------LoadNode::make----------------------------------- // Polymorphic factory method: -Node *LoadNode::make( PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt ) { +Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt, Sem sem) { Compile* C = gvn.C; // sanity check the alias category against the created node type @@ -923,34 +923,34 @@ rt->isa_oopptr() || is_immutable_value(adr), "raw memory operations should have control edge"); switch (bt) { - case T_BOOLEAN: return new (C) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int() ); - case T_BYTE: return new (C) LoadBNode (ctl, mem, adr, adr_type, rt->is_int() ); - case T_INT: return new (C) LoadINode (ctl, mem, adr, adr_type, rt->is_int() ); - case T_CHAR: return new (C) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int() ); - case T_SHORT: return new (C) LoadSNode (ctl, mem, adr, adr_type, rt->is_int() ); - case T_LONG: return new (C) LoadLNode (ctl, mem, adr, adr_type, rt->is_long() ); - case T_FLOAT: return new (C) LoadFNode (ctl, mem, adr, adr_type, rt ); - case T_DOUBLE: return new (C) LoadDNode (ctl, mem, adr, adr_type, rt ); - case T_ADDRESS: return new (C) LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr() ); + case T_BOOLEAN: return new (C) LoadUBNode(ctl, mem, adr, adr_type, rt->is_int(), sem); + case T_BYTE: return new (C) LoadBNode (ctl, mem, adr, adr_type, rt->is_int(), sem); + case T_INT: return new (C) LoadINode (ctl, mem, adr, adr_type, rt->is_int(), sem); + case T_CHAR: return new (C) LoadUSNode(ctl, mem, adr, adr_type, rt->is_int(), sem); + case T_SHORT: return new (C) LoadSNode (ctl, mem, adr, adr_type, rt->is_int(), sem); + case T_LONG: return new (C) LoadLNode (ctl, mem, adr, adr_type, rt->is_long(), false, sem); + case T_FLOAT: return new (C) LoadFNode (ctl, mem, adr, adr_type, rt, sem); + case T_DOUBLE: return new (C) LoadDNode (ctl, mem, adr, adr_type, rt, sem); + case T_ADDRESS: return new (C) LoadPNode (ctl, mem, adr, adr_type, rt->is_ptr(), sem); case T_OBJECT: #ifdef _LP64 if (adr->bottom_type()->is_ptr_to_narrowoop()) { - Node* load = gvn.transform(new (C) LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop())); + Node* load = gvn.transform(new (C) LoadNNode(ctl, mem, adr, adr_type, rt->make_narrowoop(), sem)); return new (C) DecodeNNode(load, load->bottom_type()->make_ptr()); } else #endif { assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop"); - return new (C) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr()); + return new (C) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), sem); } } ShouldNotReachHere(); return (LoadNode*)NULL; } -LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt) { +LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt, Sem sem) { bool require_atomic = true; - return new (C) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), require_atomic); + return new (C) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), require_atomic, sem); } @@ -2032,12 +2032,12 @@ #ifdef _LP64 if (adr_type->is_ptr_to_narrowklass()) { assert(UseCompressedClassPointers, "no compressed klasses"); - Node* load_klass = gvn.transform(new (C) LoadNKlassNode(ctl, mem, adr, at, tk->make_narrowklass())); + Node* load_klass = gvn.transform(new (C) LoadNKlassNode(ctl, mem, adr, at, tk->make_narrowklass(), LoadNode::unordered)); return new (C) DecodeNKlassNode(load_klass, load_klass->bottom_type()->make_ptr()); } #endif assert(!adr_type->is_ptr_to_narrowklass() && !adr_type->is_ptr_to_narrowoop(), "should have got back a narrow oop"); - return new (C) LoadKlassNode(ctl, mem, adr, at, tk); + return new (C) LoadKlassNode(ctl, mem, adr, at, tk, LoadNode::unordered); } //------------------------------Value------------------------------------------ @@ -2347,45 +2347,46 @@ //============================================================================= //---------------------------StoreNode::make----------------------------------- // Polymorphic factory method: -StoreNode* StoreNode::make( PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt ) { +StoreNode* StoreNode::make(PhaseGVN& gvn, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt, Sem sem) { + assert((sem == unordered || sem == release), "unexpected"); Compile* C = gvn.C; - assert( C->get_alias_index(adr_type) != Compile::AliasIdxRaw || - ctl != NULL, "raw memory operations should have control edge"); + assert(C->get_alias_index(adr_type) != Compile::AliasIdxRaw || + ctl != NULL, "raw memory operations should have control edge"); switch (bt) { case T_BOOLEAN: - case T_BYTE: return new (C) StoreBNode(ctl, mem, adr, adr_type, val); - case T_INT: return new (C) StoreINode(ctl, mem, adr, adr_type, val); + case T_BYTE: return new (C) StoreBNode(ctl, mem, adr, adr_type, val, sem); + case T_INT: return new (C) StoreINode(ctl, mem, adr, adr_type, val, sem); case T_CHAR: - case T_SHORT: return new (C) StoreCNode(ctl, mem, adr, adr_type, val); - case T_LONG: return new (C) StoreLNode(ctl, mem, adr, adr_type, val); - case T_FLOAT: return new (C) StoreFNode(ctl, mem, adr, adr_type, val); - case T_DOUBLE: return new (C) StoreDNode(ctl, mem, adr, adr_type, val); + case T_SHORT: return new (C) StoreCNode(ctl, mem, adr, adr_type, val, sem); + case T_LONG: return new (C) StoreLNode(ctl, mem, adr, adr_type, val, false, sem); + case T_FLOAT: return new (C) StoreFNode(ctl, mem, adr, adr_type, val, sem); + case T_DOUBLE: return new (C) StoreDNode(ctl, mem, adr, adr_type, val, sem); case T_METADATA: case T_ADDRESS: case T_OBJECT: #ifdef _LP64 if (adr->bottom_type()->is_ptr_to_narrowoop()) { val = gvn.transform(new (C) EncodePNode(val, val->bottom_type()->make_narrowoop())); - return new (C) StoreNNode(ctl, mem, adr, adr_type, val); + return new (C) StoreNNode(ctl, mem, adr, adr_type, val, sem); } else if (adr->bottom_type()->is_ptr_to_narrowklass() || (UseCompressedClassPointers && val->bottom_type()->isa_klassptr() && adr->bottom_type()->isa_rawptr())) { val = gvn.transform(new (C) EncodePKlassNode(val, val->bottom_type()->make_narrowklass())); - return new (C) StoreNKlassNode(ctl, mem, adr, adr_type, val); + return new (C) StoreNKlassNode(ctl, mem, adr, adr_type, val, sem); } #endif { - return new (C) StorePNode(ctl, mem, adr, adr_type, val); + return new (C) StorePNode(ctl, mem, adr, adr_type, val, sem); } } ShouldNotReachHere(); return (StoreNode*)NULL; } -StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val) { +StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, Sem sem) { bool require_atomic = true; - return new (C) StoreLNode(ctl, mem, adr, adr_type, val, require_atomic); + return new (C) StoreLNode(ctl, mem, adr, adr_type, val, require_atomic, sem); } @@ -2778,12 +2779,12 @@ Node *zero = phase->makecon(TypeLong::ZERO); Node *off = phase->MakeConX(BytesPerLong); - mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero); + mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero,false,StoreNode::unordered); count--; while( count-- ) { mem = phase->transform(mem); adr = phase->transform(new (phase->C) AddPNode(base,adr,off)); - mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero); + mem = new (phase->C) StoreLNode(in(0),mem,adr,atp,zero,false,StoreNode::unordered); } return mem; } @@ -2827,7 +2828,7 @@ Node* adr = new (C) AddPNode(dest, dest, phase->MakeConX(offset)); adr = phase->transform(adr); const TypePtr* atp = TypeRawPtr::BOTTOM; - mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT); + mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, StoreNode::unordered); mem = phase->transform(mem); offset += BytesPerInt; } @@ -2888,7 +2889,7 @@ Node* adr = new (C) AddPNode(dest, dest, phase->MakeConX(done_offset)); adr = phase->transform(adr); const TypePtr* atp = TypeRawPtr::BOTTOM; - mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT); + mem = StoreNode::make(*phase, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT, StoreNode::unordered); mem = phase->transform(mem); done_offset += BytesPerInt; } @@ -3762,14 +3763,14 @@ ++new_long; off[nst] = offset; st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp, - phase->longcon(con), T_LONG); + phase->longcon(con), T_LONG, StoreNode::unordered); } else { // Omit either if it is a zero. if (con0 != 0) { ++new_int; off[nst] = offset; st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp, - phase->intcon(con0), T_INT); + phase->intcon(con0), T_INT, StoreNode::unordered); } if (con1 != 0) { ++new_int; @@ -3777,7 +3778,7 @@ adr = make_raw_address(offset, phase); off[nst] = offset; st[nst++] = StoreNode::make(*phase, ctl, zmem, adr, atp, - phase->intcon(con1), T_INT); + phase->intcon(con1), T_INT, StoreNode::unordered); } } diff --git a/src/share/vm/opto/memnode.hpp b/src/share/vm/opto/memnode.hpp --- a/src/share/vm/opto/memnode.hpp +++ b/src/share/vm/opto/memnode.hpp @@ -134,20 +134,35 @@ //------------------------------LoadNode--------------------------------------- // Load value; requires Memory and Address class LoadNode : public MemNode { +public: + typedef enum { unordered = 0, acquire } Sem; + +private: + // On platforms with weak memory ordering (e.g., PPC, Ia64) we distinguish + // loads that can be reordered, and such requiring acquire semantics to + // adhere to the Java specification. The required behaviour is stored in + // this field. + const Sem _sem; + protected: - virtual uint cmp( const Node &n ) const; + virtual uint cmp(const Node &n) const; virtual uint size_of() const; // Size is bigger const Type* const _type; // What kind of value is loaded? public: - LoadNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt ) - : MemNode(c,mem,adr,at), _type(rt) { + LoadNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, Sem sem) + : MemNode(c,mem,adr,at), _type(rt), _sem(sem) { init_class_id(Class_Load); } + inline bool is_unordered() const { return !is_acquire(); } + inline bool is_acquire() const { + assert(_sem == unordered || _sem == acquire, "unexpected"); + return _sem == acquire; + } // Polymorphic factory method: - static Node* make( PhaseGVN& gvn, Node *c, Node *mem, Node *adr, - const TypePtr* at, const Type *rt, BasicType bt ); + static Node* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr, + const TypePtr* at, const Type *rt, BasicType bt, Sem sem); virtual uint hash() const; // Check the type @@ -210,8 +225,8 @@ // Load a byte (8bits signed) from memory class LoadBNode : public LoadNode { public: - LoadBNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE ) - : LoadNode(c,mem,adr,at,ti) {} + LoadBNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, Sem sem) + : LoadNode(c, mem, adr, at, ti, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegI; } virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); @@ -224,8 +239,8 @@ // Load a unsigned byte (8bits unsigned) from memory class LoadUBNode : public LoadNode { public: - LoadUBNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt* ti = TypeInt::UBYTE ) - : LoadNode(c, mem, adr, at, ti) {} + LoadUBNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt* ti, Sem sem) + : LoadNode(c, mem, adr, at, ti, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegI; } virtual Node* Ideal(PhaseGVN *phase, bool can_reshape); @@ -238,8 +253,8 @@ // Load an unsigned short/char (16bits unsigned) from memory class LoadUSNode : public LoadNode { public: - LoadUSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR ) - : LoadNode(c,mem,adr,at,ti) {} + LoadUSNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, Sem sem) + : LoadNode(c, mem, adr, at, ti, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegI; } virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); @@ -252,8 +267,8 @@ // Load a short (16bits signed) from memory class LoadSNode : public LoadNode { public: - LoadSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT ) - : LoadNode(c,mem,adr,at,ti) {} + LoadSNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, Sem sem) + : LoadNode(c, mem, adr, at, ti, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegI; } virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); @@ -266,8 +281,8 @@ // Load an integer from memory class LoadINode : public LoadNode { public: - LoadINode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT ) - : LoadNode(c,mem,adr,at,ti) {} + LoadINode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti, Sem sem) + : LoadNode(c, mem, adr, at, ti, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegI; } virtual int store_Opcode() const { return Op_StoreI; } @@ -278,8 +293,8 @@ // Load an array length from the array class LoadRangeNode : public LoadINode { public: - LoadRangeNode( Node *c, Node *mem, Node *adr, const TypeInt *ti = TypeInt::POS ) - : LoadINode(c,mem,adr,TypeAryPtr::RANGE,ti) {} + LoadRangeNode(Node *c, Node *mem, Node *adr, const TypeInt *ti = TypeInt::POS) + : LoadINode(c, mem, adr, TypeAryPtr::RANGE, ti, LoadNode::unordered) {} virtual int Opcode() const; virtual const Type *Value( PhaseTransform *phase ) const; virtual Node *Identity( PhaseTransform *phase ); @@ -298,18 +313,16 @@ const bool _require_atomic_access; // is piecewise load forbidden? public: - LoadLNode( Node *c, Node *mem, Node *adr, const TypePtr* at, - const TypeLong *tl = TypeLong::LONG, - bool require_atomic_access = false ) - : LoadNode(c,mem,adr,at,tl) - , _require_atomic_access(require_atomic_access) - {} + LoadLNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeLong *tl, + bool require_atomic_access, Sem sem) + : LoadNode(c, mem, adr, at, tl, sem), _require_atomic_access(require_atomic_access) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegL; } virtual int store_Opcode() const { return Op_StoreL; } virtual BasicType memory_type() const { return T_LONG; } bool require_atomic_access() { return _require_atomic_access; } - static LoadLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt); + static LoadLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, + const Type* rt, Sem sem); #ifndef PRODUCT virtual void dump_spec(outputStream *st) const { LoadNode::dump_spec(st); @@ -322,8 +335,8 @@ // Load a long from unaligned memory class LoadL_unalignedNode : public LoadLNode { public: - LoadL_unalignedNode( Node *c, Node *mem, Node *adr, const TypePtr* at ) - : LoadLNode(c,mem,adr,at) {} + LoadL_unalignedNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Sem sem) + : LoadLNode(c, mem, adr, at, TypeLong::LONG, false, sem) {} virtual int Opcode() const; }; @@ -331,8 +344,8 @@ // Load a float (64 bits) from memory class LoadFNode : public LoadNode { public: - LoadFNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t = Type::FLOAT ) - : LoadNode(c,mem,adr,at,t) {} + LoadFNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t, Sem sem) + : LoadNode(c, mem, adr, at, t, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegF; } virtual int store_Opcode() const { return Op_StoreF; } @@ -343,8 +356,8 @@ // Load a double (64 bits) from memory class LoadDNode : public LoadNode { public: - LoadDNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t = Type::DOUBLE ) - : LoadNode(c,mem,adr,at,t) {} + LoadDNode(Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t, Sem sem) + : LoadNode(c, mem, adr, at, t, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegD; } virtual int store_Opcode() const { return Op_StoreD; } @@ -355,8 +368,8 @@ // Load a double from unaligned memory class LoadD_unalignedNode : public LoadDNode { public: - LoadD_unalignedNode( Node *c, Node *mem, Node *adr, const TypePtr* at ) - : LoadDNode(c,mem,adr,at) {} + LoadD_unalignedNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Sem sem) + : LoadDNode(c, mem, adr, at, Type::DOUBLE, sem) {} virtual int Opcode() const; }; @@ -364,8 +377,8 @@ // Load a pointer from memory (either object or array) class LoadPNode : public LoadNode { public: - LoadPNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypePtr* t ) - : LoadNode(c,mem,adr,at,t) {} + LoadPNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypePtr* t, Sem sem) + : LoadNode(c, mem, adr, at, t, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegP; } virtual int store_Opcode() const { return Op_StoreP; } @@ -387,8 +400,8 @@ // Load a narrow oop from memory (either object or array) class LoadNNode : public LoadNode { public: - LoadNNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const Type* t ) - : LoadNode(c,mem,adr,at,t) {} + LoadNNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const Type* t, Sem sem) + : LoadNode(c, mem, adr, at, t, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegN; } virtual int store_Opcode() const { return Op_StoreN; } @@ -409,8 +422,8 @@ // Load a Klass from an object class LoadKlassNode : public LoadPNode { public: - LoadKlassNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk ) - : LoadPNode(c,mem,adr,at,tk) {} + LoadKlassNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk, Sem sem) + : LoadPNode(c, mem, adr, at, tk, sem) {} virtual int Opcode() const; virtual const Type *Value( PhaseTransform *phase ) const; virtual Node *Identity( PhaseTransform *phase ); @@ -425,8 +438,8 @@ // Load a narrow Klass from an object. class LoadNKlassNode : public LoadNNode { public: - LoadNKlassNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeNarrowKlass *tk ) - : LoadNNode(c,mem,adr,at,tk) {} + LoadNKlassNode(Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeNarrowKlass *tk, Sem sem) + : LoadNNode(c, mem, adr, at, tk, sem) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegN; } virtual int store_Opcode() const { return Op_StoreNKlass; } @@ -441,6 +454,16 @@ //------------------------------StoreNode-------------------------------------- // Store value; requires Store, Address and Value class StoreNode : public MemNode { +public: + typedef enum { unordered = 0, release } Sem; +private: + // On platforms with weak memory ordering (e.g., PPC, Ia64) we distinguish + // stores that can be reordered, and such requiring release semantics to + // adhere to the Java specification. The required behaviour is stored in + // this field. + const Sem _sem; + // Needed for proper cloning. + virtual uint size_of() const { return sizeof(*this); } protected: virtual uint cmp( const Node &n ) const; virtual bool depends_only_on_test() const { return false; } @@ -449,18 +472,44 @@ Node *Ideal_sign_extended_input(PhaseGVN *phase, int num_bits); public: - StoreNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) - : MemNode(c,mem,adr,at,val) { + // We must ensure that stores of object references will be visible + // only after the object's initialization. So the callers of this + // procedure must indicate that the store requires `release' + // semantics, if the stored value is an object reference that might + // point to a new object and may become externally visible. + StoreNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Sem sem) + : MemNode(c, mem, adr, at, val), _sem(sem) { init_class_id(Class_Store); } - StoreNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store ) - : MemNode(c,mem,adr,at,val,oop_store) { + StoreNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store, Sem sem) + : MemNode(c, mem, adr, at, val, oop_store), _sem(sem) { init_class_id(Class_Store); } - // Polymorphic factory method: - static StoreNode* make( PhaseGVN& gvn, Node *c, Node *mem, Node *adr, - const TypePtr* at, Node *val, BasicType bt ); + inline bool is_unordered() const { return !is_release(); } + inline bool is_release() const { + assert((_sem == unordered || _sem == release), "unexpected"); + return _sem == release; + } + + // Conservatively release stores of object references in order to + // ensure visibility of object initialization. + static inline Sem release_if_reference(const BasicType t) { + const Sem s = (t == T_ARRAY || + t == T_ADDRESS || // Might be the address of an object reference (`boxing'). + t == T_OBJECT) ? release : unordered; + return s; + } + + // Polymorphic factory method + // + // We must ensure that stores of object references will be visible + // only after the object's initialization. So the callers of this + // procedure must indicate that the store requires `release' + // semantics, if the stored value is an object reference that might + // point to a new object and may become externally visible. + static StoreNode* make(PhaseGVN& gvn, Node *c, Node *mem, Node *adr, + const TypePtr* at, Node *val, BasicType bt, Sem sem); virtual uint hash() const; // Check the type @@ -491,7 +540,8 @@ // Store byte to memory class StoreBNode : public StoreNode { public: - StoreBNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {} + StoreBNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Sem sem) + : StoreNode(c, mem, adr, at, val, sem) {} virtual int Opcode() const; virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual BasicType memory_type() const { return T_BYTE; } @@ -501,7 +551,8 @@ // Store char/short to memory class StoreCNode : public StoreNode { public: - StoreCNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {} + StoreCNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Sem sem) + : StoreNode(c, mem, adr, at, val, sem) {} virtual int Opcode() const; virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual BasicType memory_type() const { return T_CHAR; } @@ -511,7 +562,8 @@ // Store int to memory class StoreINode : public StoreNode { public: - StoreINode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {} + StoreINode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Sem sem) + : StoreNode(c, mem, adr, at, val, sem) {} virtual int Opcode() const; virtual BasicType memory_type() const { return T_INT; } }; @@ -528,15 +580,12 @@ const bool _require_atomic_access; // is piecewise store forbidden? public: - StoreLNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, - bool require_atomic_access = false ) - : StoreNode(c,mem,adr,at,val) - , _require_atomic_access(require_atomic_access) - {} + StoreLNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, bool require_atomic_access, Sem sem) + : StoreNode(c, mem, adr, at, val, sem), _require_atomic_access(require_atomic_access) {} virtual int Opcode() const; virtual BasicType memory_type() const { return T_LONG; } bool require_atomic_access() { return _require_atomic_access; } - static StoreLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val); + static StoreLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, Sem sem); #ifndef PRODUCT virtual void dump_spec(outputStream *st) const { StoreNode::dump_spec(st); @@ -549,7 +598,8 @@ // Store float to memory class StoreFNode : public StoreNode { public: - StoreFNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {} + StoreFNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Sem sem) + : StoreNode(c, mem, adr, at, val, sem) {} virtual int Opcode() const; virtual BasicType memory_type() const { return T_FLOAT; } }; @@ -558,7 +608,8 @@ // Store double to memory class StoreDNode : public StoreNode { public: - StoreDNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {} + StoreDNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Sem sem) + : StoreNode(c, mem, adr, at, val, sem) {} virtual int Opcode() const; virtual BasicType memory_type() const { return T_DOUBLE; } }; @@ -567,7 +618,8 @@ // Store pointer to memory class StorePNode : public StoreNode { public: - StorePNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {} + StorePNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Sem sem) + : StoreNode(c, mem, adr, at, val, sem) {} virtual int Opcode() const; virtual BasicType memory_type() const { return T_ADDRESS; } }; @@ -576,7 +628,8 @@ // Store narrow oop to memory class StoreNNode : public StoreNode { public: - StoreNNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {} + StoreNNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Sem sem) + : StoreNode(c, mem, adr, at, val, sem) {} virtual int Opcode() const; virtual BasicType memory_type() const { return T_NARROWOOP; } }; @@ -585,7 +638,8 @@ // Store narrow klass to memory class StoreNKlassNode : public StoreNNode { public: - StoreNKlassNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNNode(c,mem,adr,at,val) {} + StoreNKlassNode(Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Sem sem) + : StoreNNode(c, mem, adr, at, val, sem) {} virtual int Opcode() const; virtual BasicType memory_type() const { return T_NARROWKLASS; } }; @@ -606,7 +660,7 @@ public: StoreCMNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store, int oop_alias_idx ) : - StoreNode(c,mem,adr,at,val,oop_store), + StoreNode(c, mem, adr, at, val, oop_store, StoreNode::release), _oop_alias_idx(oop_alias_idx) { assert(_oop_alias_idx >= Compile::AliasIdxRaw || _oop_alias_idx == Compile::AliasIdxBot && Compile::current()->AliasLevel() == 0, @@ -626,8 +680,8 @@ // On PowerPC and friends it's a real load-locked. class LoadPLockedNode : public LoadPNode { public: - LoadPLockedNode( Node *c, Node *mem, Node *adr ) - : LoadPNode(c,mem,adr,TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM) {} + LoadPLockedNode(Node *c, Node *mem, Node *adr, Sem sem) + : LoadPNode(c, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, sem) {} virtual int Opcode() const; virtual int store_Opcode() const { return Op_StorePConditional; } virtual bool depends_only_on_test() const { return true; } diff --git a/src/share/vm/opto/mulnode.cpp b/src/share/vm/opto/mulnode.cpp --- a/src/share/vm/opto/mulnode.cpp +++ b/src/share/vm/opto/mulnode.cpp @@ -485,7 +485,8 @@ Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control), load->in(MemNode::Memory), load->in(MemNode::Address), - load->adr_type()); + load->adr_type(), + TypeInt::CHAR, LoadNode::unordered); ldus = phase->transform(ldus); return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF)); } @@ -496,7 +497,8 @@ Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control), load->in(MemNode::Memory), load->in(MemNode::Address), - load->adr_type()); + load->adr_type(), + TypeInt::UBYTE, LoadNode::unordered); ldub = phase->transform(ldub); return new (phase->C) AndINode(ldub, phase->intcon(mask)); } @@ -931,9 +933,10 @@ ld->outcnt() == 1 && ld->unique_out() == shl) // Replace zero-extension-load with sign-extension-load return new (phase->C) LoadSNode( ld->in(MemNode::Control), - ld->in(MemNode::Memory), - ld->in(MemNode::Address), - ld->adr_type()); + ld->in(MemNode::Memory), + ld->in(MemNode::Address), + ld->adr_type(), TypeInt::SHORT, + LoadNode::unordered); } // Check for "(byte[i] <<24)>>24" which simply sign-extends diff --git a/src/share/vm/opto/parse1.cpp b/src/share/vm/opto/parse1.cpp --- a/src/share/vm/opto/parse1.cpp +++ b/src/share/vm/opto/parse1.cpp @@ -106,24 +106,24 @@ // Very similar to LoadNode::make, except we handle un-aligned longs and // doubles on Sparc. Intel can handle them just fine directly. Node *l; - switch( bt ) { // Signature is flattened - case T_INT: l = new (C) LoadINode( ctl, mem, adr, TypeRawPtr::BOTTOM ); break; - case T_FLOAT: l = new (C) LoadFNode( ctl, mem, adr, TypeRawPtr::BOTTOM ); break; - case T_ADDRESS: l = new (C) LoadPNode( ctl, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM ); break; - case T_OBJECT: l = new (C) LoadPNode( ctl, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM ); break; + switch (bt) { // Signature is flattened + case T_INT: l = new (C) LoadINode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeInt::INT, LoadNode::unordered); break; + case T_FLOAT: l = new (C) LoadFNode(ctl, mem, adr, TypeRawPtr::BOTTOM, Type::FLOAT, LoadNode::unordered); break; + case T_ADDRESS: l = new (C) LoadPNode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM, LoadNode::unordered); break; + case T_OBJECT: l = new (C) LoadPNode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM, LoadNode::unordered); break; case T_LONG: case T_DOUBLE: { // Since arguments are in reverse order, the argument address 'adr' // refers to the back half of the long/double. Recompute adr. - adr = basic_plus_adr( local_addrs_base, local_addrs, -(index+1)*wordSize ); - if( Matcher::misaligned_doubles_ok ) { + adr = basic_plus_adr(local_addrs_base, local_addrs, -(index+1)*wordSize); + if (Matcher::misaligned_doubles_ok) { l = (bt == T_DOUBLE) - ? (Node*)new (C) LoadDNode( ctl, mem, adr, TypeRawPtr::BOTTOM ) - : (Node*)new (C) LoadLNode( ctl, mem, adr, TypeRawPtr::BOTTOM ); + ? (Node*)new (C) LoadDNode(ctl, mem, adr, TypeRawPtr::BOTTOM, Type::DOUBLE, LoadNode::unordered) + : (Node*)new (C) LoadLNode(ctl, mem, adr, TypeRawPtr::BOTTOM, TypeLong::LONG, false, LoadNode::unordered); } else { l = (bt == T_DOUBLE) - ? (Node*)new (C) LoadD_unalignedNode( ctl, mem, adr, TypeRawPtr::BOTTOM ) - : (Node*)new (C) LoadL_unalignedNode( ctl, mem, adr, TypeRawPtr::BOTTOM ); + ? (Node*)new (C) LoadD_unalignedNode(ctl, mem, adr, TypeRawPtr::BOTTOM, LoadNode::unordered) + : (Node*)new (C) LoadL_unalignedNode(ctl, mem, adr, TypeRawPtr::BOTTOM, LoadNode::unordered); } break; } @@ -229,7 +229,7 @@ Node *displaced_hdr = fetch_interpreter_state((index*2) + 1, T_ADDRESS, monitors_addr, osr_buf); - store_to_memory(control(), box, displaced_hdr, T_ADDRESS, Compile::AliasIdxRaw); + store_to_memory(control(), box, displaced_hdr, T_ADDRESS, Compile::AliasIdxRaw, false, StoreNode::unordered); // Build a bogus FastLockNode (no code will be generated) and push the // monitor into our debug info. @@ -1931,7 +1931,7 @@ Node* klass = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), klass_addr, TypeInstPtr::KLASS) ); Node* access_flags_addr = basic_plus_adr(klass, klass, in_bytes(Klass::access_flags_offset())); - Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT); + Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT, false, LoadNode::unordered); Node* mask = _gvn.transform(new (C) AndINode(access_flags, intcon(JVM_ACC_HAS_FINALIZER))); Node* check = _gvn.transform(new (C) CmpINode(mask, intcon(0))); diff --git a/src/share/vm/opto/parse2.cpp b/src/share/vm/opto/parse2.cpp --- a/src/share/vm/opto/parse2.cpp +++ b/src/share/vm/opto/parse2.cpp @@ -50,7 +50,7 @@ if (stopped()) return; // guaranteed null or range check dec_sp(2); // Pop array and index const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); - Node* ld = make_load(control(), adr, elem, elem_type, adr_type); + Node* ld = make_load(control(), adr, elem, elem_type, adr_type, false, LoadNode::unordered); push(ld); } @@ -62,7 +62,7 @@ Node* val = pop(); dec_sp(2); // Pop array and index const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); - store_to_memory(control(), adr, val, elem_type, adr_type); + store_to_memory(control(), adr, val, elem_type, adr_type, false, StoreNode::release_if_reference(elem_type)); } @@ -1720,14 +1720,14 @@ a = array_addressing(T_LONG, 0); if (stopped()) return; // guaranteed null or range check dec_sp(2); // Pop array and index - push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS)); + push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS, false, LoadNode::unordered)); break; } case Bytecodes::_daload: { a = array_addressing(T_DOUBLE, 0); if (stopped()) return; // guaranteed null or range check dec_sp(2); // Pop array and index - push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES)); + push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES, false, LoadNode::unordered)); break; } case Bytecodes::_bastore: array_store(T_BYTE); break; @@ -1744,7 +1744,7 @@ a = pop(); // the array itself const TypeOopPtr* elemtype = _gvn.type(a)->is_aryptr()->elem()->make_oopptr(); const TypeAryPtr* adr_type = TypeAryPtr::OOPS; - Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT); + Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT, StoreNode::release); break; } case Bytecodes::_lastore: { @@ -1752,7 +1752,7 @@ if (stopped()) return; // guaranteed null or range check c = pop_pair(); dec_sp(2); // Pop array and index - store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS); + store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS, false, StoreNode::unordered); break; } case Bytecodes::_dastore: { @@ -1761,7 +1761,7 @@ c = pop_pair(); dec_sp(2); // Pop array and index c = dstore_rounding(c); - store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES); + store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES, false, StoreNode::unordered); break; } case Bytecodes::_getfield: diff --git a/src/share/vm/opto/parse3.cpp b/src/share/vm/opto/parse3.cpp --- a/src/share/vm/opto/parse3.cpp +++ b/src/share/vm/opto/parse3.cpp @@ -228,7 +228,9 @@ type = Type::get_const_basic_type(bt); } // Build the load. - Node* ld = make_load(NULL, adr, type, bt, adr_type, is_vol); + // + LoadNode::Sem sem = is_vol ? LoadNode::acquire : LoadNode::unordered; + Node* ld = make_load(NULL, adr, type, bt, adr_type, is_vol, sem); // Adjust Java stack if (type2size[bt] == 1) @@ -288,6 +290,16 @@ // Round doubles before storing if (bt == T_DOUBLE) val = dstore_rounding(val); + // Conservatively release stores of object references. + const StoreNode::Sem sem = + is_vol ? + // Volatile fields need releasing stores. + StoreNode::release : + // Non-volatile fields also need releasing stores if they hold an + // object reference, because the object reference might point to + // a freshly created object. + StoreNode::release_if_reference(bt); + // Store the value. Node* store; if (bt == T_OBJECT) { @@ -297,9 +309,9 @@ } else { field_type = TypeOopPtr::make_from_klass(field->type()->as_klass()); } - store = store_oop_to_object( control(), obj, adr, adr_type, val, field_type, bt); + store = store_oop_to_object(control(), obj, adr, adr_type, val, field_type, bt, sem); } else { - store = store_to_memory( control(), adr, val, bt, adr_type, is_vol ); + store = store_to_memory(control(), adr, val, bt, adr_type, is_vol, sem); } // If reference is volatile, prevent following volatiles ops from @@ -414,7 +426,7 @@ Node* elem = expand_multianewarray(array_klass_1, &lengths[1], ndimensions-1, nargs); intptr_t offset = header + ((intptr_t)i << LogBytesPerHeapOop); Node* eaddr = basic_plus_adr(array, offset); - store_oop_to_array(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT); + store_oop_to_array(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT, StoreNode::unordered); } } return array; @@ -503,7 +515,7 @@ // Fill-in it with values for (j = 0; j < ndimensions; j++) { Node *dims_elem = array_element_address(dims, intcon(j), T_INT); - store_to_memory(control(), dims_elem, length[j], T_INT, TypeAryPtr::INTS); + store_to_memory(control(), dims_elem, length[j], T_INT, TypeAryPtr::INTS, false, StoreNode::unordered); } } diff --git a/src/share/vm/opto/parseHelper.cpp b/src/share/vm/opto/parseHelper.cpp --- a/src/share/vm/opto/parseHelper.cpp +++ b/src/share/vm/opto/parseHelper.cpp @@ -222,7 +222,7 @@ Node* init_thread_offset = _gvn.MakeConX(in_bytes(InstanceKlass::init_thread_offset())); Node* adr_node = basic_plus_adr(kls, kls, init_thread_offset); - Node* init_thread = make_load(NULL, adr_node, TypeRawPtr::BOTTOM, T_ADDRESS); + Node* init_thread = make_load(NULL, adr_node, TypeRawPtr::BOTTOM, T_ADDRESS, false, LoadNode::unordered); Node *tst = Bool( CmpP( init_thread, cur_thread), BoolTest::eq); IfNode* iff = create_and_map_if(control(), tst, PROB_ALWAYS, COUNT_UNKNOWN); set_control(IfTrue(iff)); @@ -232,7 +232,7 @@ adr_node = basic_plus_adr(kls, kls, init_state_offset); // Use T_BOOLEAN for InstanceKlass::_init_state so the compiler // can generate code to load it as unsigned byte. - Node* init_state = make_load(NULL, adr_node, TypeInt::UBYTE, T_BOOLEAN); + Node* init_state = make_load(NULL, adr_node, TypeInt::UBYTE, T_BOOLEAN, false, LoadNode::unordered); Node* being_init = _gvn.intcon(InstanceKlass::being_initialized); tst = Bool( CmpI( init_state, being_init), BoolTest::eq); iff = create_and_map_if(control(), tst, PROB_ALWAYS, COUNT_UNKNOWN); @@ -354,13 +354,13 @@ Node *counters_node = makecon(adr_type); Node* adr_iic_node = basic_plus_adr(counters_node, counters_node, MethodCounters::interpreter_invocation_counter_offset_in_bytes()); - Node* cnt = make_load(ctrl, adr_iic_node, TypeInt::INT, T_INT, adr_type); + Node* cnt = make_load(ctrl, adr_iic_node, TypeInt::INT, T_INT, adr_type, false, LoadNode::unordered); test_counter_against_threshold(cnt, limit); // Add one to the counter and store Node* incr = _gvn.transform(new (C) AddINode(cnt, _gvn.intcon(1))); - store_to_memory( ctrl, adr_iic_node, incr, T_INT, adr_type ); + store_to_memory(ctrl, adr_iic_node, incr, T_INT, adr_type, false, StoreNode::unordered); } //----------------------------method_data_addressing--------------------------- @@ -392,9 +392,9 @@ Node* adr_node = method_data_addressing(md, data, counter_offset, idx, stride); const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr(); - Node* cnt = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type); + Node* cnt = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type, false, LoadNode::unordered); Node* incr = _gvn.transform(new (C) AddINode(cnt, _gvn.intcon(DataLayout::counter_increment))); - store_to_memory(NULL, adr_node, incr, T_INT, adr_type ); + store_to_memory(NULL, adr_node, incr, T_INT, adr_type, false, StoreNode::unordered); } //--------------------------test_for_osr_md_counter_at------------------------- @@ -402,7 +402,7 @@ Node* adr_node = method_data_addressing(md, data, counter_offset); const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr(); - Node* cnt = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type); + Node* cnt = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type, false, LoadNode::unordered); test_counter_against_threshold(cnt, limit); } @@ -412,9 +412,9 @@ Node* adr_node = method_data_addressing(md, data, DataLayout::flags_offset()); const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr(); - Node* flags = make_load(NULL, adr_node, TypeInt::BYTE, T_BYTE, adr_type); + Node* flags = make_load(NULL, adr_node, TypeInt::BYTE, T_BYTE, adr_type, false, LoadNode::unordered); Node* incr = _gvn.transform(new (C) OrINode(flags, _gvn.intcon(flag_constant))); - store_to_memory(NULL, adr_node, incr, T_BYTE, adr_type); + store_to_memory(NULL, adr_node, incr, T_BYTE, adr_type, false, StoreNode::unordered); } //----------------------------profile_taken_branch----------------------------- diff --git a/src/share/vm/opto/stringopts.cpp b/src/share/vm/opto/stringopts.cpp --- a/src/share/vm/opto/stringopts.cpp +++ b/src/share/vm/opto/stringopts.cpp @@ -1122,7 +1122,8 @@ return kit.make_load(NULL, kit.basic_plus_adr(klass_node, field->offset_in_bytes()), type, T_OBJECT, - C->get_alias_index(mirror_type->add_offset(field->offset_in_bytes()))); + C->get_alias_index(mirror_type->add_offset(field->offset_in_bytes())), + false, LoadNode::unordered); } Node* PhaseStringOpts::int_stringSize(GraphKit& kit, Node* arg) { @@ -1314,7 +1315,7 @@ Node* ch = __ AddI(r, __ intcon('0')); Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR), - ch, T_CHAR, char_adr_idx); + ch, T_CHAR, char_adr_idx, false, StoreNode::unordered); IfNode* iff = kit.create_and_map_if(head, __ Bool(__ CmpI(q, __ intcon(0)), BoolTest::ne), @@ -1356,7 +1357,7 @@ } else { Node* m1 = __ SubI(charPos, __ intcon(1)); Node* st = __ store_to_memory(kit.control(), kit.array_element_address(char_array, m1, T_CHAR), - sign, T_CHAR, char_adr_idx); + sign, T_CHAR, char_adr_idx, false, StoreNode::unordered); final_merge->init_req(1, kit.control()); final_mem->init_req(1, st); @@ -1387,7 +1388,8 @@ ciTypeArray* value_array = t->const_oop()->as_type_array(); for (int e = 0; e < c; e++) { __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR), - __ intcon(value_array->char_at(o + e)), T_CHAR, char_adr_idx); + __ intcon(value_array->char_at(o + e)), T_CHAR, char_adr_idx, + false, StoreNode::unordered); start = __ AddI(start, __ intcon(1)); } } else { @@ -1607,7 +1609,7 @@ } case StringConcat::CharMode: { __ store_to_memory(kit.control(), kit.array_element_address(char_array, start, T_CHAR), - arg, T_CHAR, char_adr_idx); + arg, T_CHAR, char_adr_idx, false, StoreNode::unordered); start = __ AddI(start, __ intcon(1)); break; } diff --git a/src/share/vm/opto/vectornode.hpp b/src/share/vm/opto/vectornode.hpp --- a/src/share/vm/opto/vectornode.hpp +++ b/src/share/vm/opto/vectornode.hpp @@ -356,7 +356,7 @@ class LoadVectorNode : public LoadNode { public: LoadVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeVect* vt) - : LoadNode(c, mem, adr, at, vt) { + : LoadNode(c, mem, adr, at, vt, LoadNode::unordered) { init_class_id(Class_LoadVector); } @@ -380,7 +380,7 @@ class StoreVectorNode : public StoreNode { public: StoreVectorNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val) - : StoreNode(c, mem, adr, at, val) { + : StoreNode(c, mem, adr, at, val, StoreNode::unordered) { assert(val->is_Vector() || val->is_LoadVector(), "sanity"); init_class_id(Class_StoreVector); }