src/share/vm/opto/graphKit.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File hotspot Sdiff src/share/vm/opto

src/share/vm/opto/graphKit.cpp

Print this page
rev 10072 : 8087341: C2 doesn't optimize redundant memory operations with G1
Summary: effect of memory barrier in post barrier is too wide
Reviewed-by:


3132   // from sliding up past the just-emitted store.
3133 
3134   MemBarNode* mb = MemBarNode::make(C, opcode, alias_idx, precedent);
3135   mb->set_req(TypeFunc::Control,control());
3136   if (alias_idx == Compile::AliasIdxBot) {
3137     mb->set_req(TypeFunc::Memory, merged_memory()->base_memory());
3138   } else {
3139     assert(!(opcode == Op_Initialize && alias_idx != Compile::AliasIdxRaw), "fix caller");
3140     mb->set_req(TypeFunc::Memory, memory(alias_idx));
3141   }
3142   Node* membar = _gvn.transform(mb);
3143   set_control(_gvn.transform(new ProjNode(membar, TypeFunc::Control)));
3144   if (alias_idx == Compile::AliasIdxBot) {
3145     merged_memory()->set_base_memory(_gvn.transform(new ProjNode(membar, TypeFunc::Memory)));
3146   } else {
3147     set_memory(_gvn.transform(new ProjNode(membar, TypeFunc::Memory)),alias_idx);
3148   }
3149   return membar;
3150 }
3151 













3152 //------------------------------shared_lock------------------------------------
3153 // Emit locking code.
3154 FastLockNode* GraphKit::shared_lock(Node* obj) {
3155   // bci is either a monitorenter bc or InvocationEntryBci
3156   // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces
3157   assert(SynchronizationEntryBCI == InvocationEntryBci, "");
3158 
3159   if( !GenerateSynchronizationCode )
3160     return NULL;                // Not locking things?
3161   if (stopped())                // Dead monitor?
3162     return NULL;
3163 
3164   assert(dead_locals_are_killed(), "should kill locals before sync. point");
3165 
3166   // Box the stack location
3167   Node* box = _gvn.transform(new BoxLockNode(next_monitor()));
3168   Node* mem = reset_memory();
3169 
3170   FastLockNode * flock = _gvn.transform(new FastLockNode(0, obj, box) )->as_FastLock();
3171   if (UseBiasedLocking && PrintPreciseBiasedLockingStatistics) {


3823 
3824   IdealKit ideal(this, true);
3825 
3826   // Convert the pointer to an int prior to doing math on it
3827   Node* cast = __ CastPX(__ ctrl(), adr);
3828 
3829   // Divide by card size
3830   assert(Universe::heap()->barrier_set()->is_a(BarrierSet::CardTableModRef),
3831          "Only one we handle so far.");
3832   Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
3833 
3834   // Combine card table base and card offset
3835   Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset );
3836 
3837   // Get the alias_index for raw card-mark memory
3838   int adr_type = Compile::AliasIdxRaw;
3839   Node*   zero = __ ConI(0); // Dirty card value
3840   BasicType bt = T_BYTE;
3841 
3842   if (UseConcMarkSweepGC && UseCondCardMark) {
3843     insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
3844     __ sync_kit(this);
3845   }
3846 
3847   if (UseCondCardMark) {
3848     // The classic GC reference write barrier is typically implemented
3849     // as a store into the global card mark table.  Unfortunately
3850     // unconditional stores can result in false sharing and excessive
3851     // coherence traffic as well as false transactional aborts.
3852     // UseCondCardMark enables MP "polite" conditional card mark
3853     // stores.  In theory we could relax the load from ctrl() to
3854     // no_ctrl, but that doesn't buy much latitude.
3855     Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, bt, adr_type);
3856     __ if_then(card_val, BoolTest::ne, zero);
3857   }
3858 
3859   // Smash zero into card
3860   if( !UseConcMarkSweepGC ) {
3861     __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::unordered);
3862   } else {
3863     // Specialized path for CM store barrier


4263     // Does the store cause us to cross regions?
4264 
4265     // Should be able to do an unsigned compare of region_size instead of
4266     // and extra shift. Do we have an unsigned compare??
4267     // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);
4268     Node* xor_res =  __ URShiftX ( __ XorX( cast,  __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes));
4269 
4270     // if (xor_res == 0) same region so skip
4271     __ if_then(xor_res, BoolTest::ne, zeroX); {
4272 
4273       // No barrier if we are storing a NULL
4274       __ if_then(val, BoolTest::ne, null(), unlikely); {
4275 
4276         // Ok must mark the card if not already dirty
4277 
4278         // load the original value of the card
4279         Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
4280 
4281         __ if_then(card_val, BoolTest::ne, young_card); {
4282           sync_kit(ideal);
4283           // Use Op_MemBarVolatile to achieve the effect of a StoreLoad barrier.
4284           insert_mem_bar(Op_MemBarVolatile, oop_store);
4285           __ sync_kit(this);
4286 
4287           Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
4288           __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
4289             g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
4290           } __ end_if();
4291         } __ end_if();
4292       } __ end_if();
4293     } __ end_if();
4294   } else {
4295     // Object.clone() instrinsic uses this path.
4296     g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
4297   }
4298 
4299   // Final sync IdealKit and GraphKit.
4300   final_sync(ideal);
4301 }
4302 #undef __
4303 
4304 




3132   // from sliding up past the just-emitted store.
3133 
3134   MemBarNode* mb = MemBarNode::make(C, opcode, alias_idx, precedent);
3135   mb->set_req(TypeFunc::Control,control());
3136   if (alias_idx == Compile::AliasIdxBot) {
3137     mb->set_req(TypeFunc::Memory, merged_memory()->base_memory());
3138   } else {
3139     assert(!(opcode == Op_Initialize && alias_idx != Compile::AliasIdxRaw), "fix caller");
3140     mb->set_req(TypeFunc::Memory, memory(alias_idx));
3141   }
3142   Node* membar = _gvn.transform(mb);
3143   set_control(_gvn.transform(new ProjNode(membar, TypeFunc::Control)));
3144   if (alias_idx == Compile::AliasIdxBot) {
3145     merged_memory()->set_base_memory(_gvn.transform(new ProjNode(membar, TypeFunc::Memory)));
3146   } else {
3147     set_memory(_gvn.transform(new ProjNode(membar, TypeFunc::Memory)),alias_idx);
3148   }
3149   return membar;
3150 }
3151 
3152 void GraphKit::insert_store_load_for_barrier() {
3153   Node* mem = reset_memory();
3154   MemBarNode* mb = MemBarNode::make(C, Op_MemBarVolatile, Compile::AliasIdxBot);
3155   mb->init_req(TypeFunc::Control, control());
3156   mb->init_req(TypeFunc::Memory, mem);
3157   Node* membar = _gvn.transform(mb);
3158   set_control(_gvn.transform(new ProjNode(membar, TypeFunc::Control)));
3159   Node* newmem = _gvn.transform(new ProjNode(membar, TypeFunc::Memory));
3160   set_all_memory(mem);
3161   set_memory(newmem, Compile::AliasIdxRaw);
3162 }
3163 
3164 
3165 //------------------------------shared_lock------------------------------------
3166 // Emit locking code.
3167 FastLockNode* GraphKit::shared_lock(Node* obj) {
3168   // bci is either a monitorenter bc or InvocationEntryBci
3169   // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces
3170   assert(SynchronizationEntryBCI == InvocationEntryBci, "");
3171 
3172   if( !GenerateSynchronizationCode )
3173     return NULL;                // Not locking things?
3174   if (stopped())                // Dead monitor?
3175     return NULL;
3176 
3177   assert(dead_locals_are_killed(), "should kill locals before sync. point");
3178 
3179   // Box the stack location
3180   Node* box = _gvn.transform(new BoxLockNode(next_monitor()));
3181   Node* mem = reset_memory();
3182 
3183   FastLockNode * flock = _gvn.transform(new FastLockNode(0, obj, box) )->as_FastLock();
3184   if (UseBiasedLocking && PrintPreciseBiasedLockingStatistics) {


3836 
3837   IdealKit ideal(this, true);
3838 
3839   // Convert the pointer to an int prior to doing math on it
3840   Node* cast = __ CastPX(__ ctrl(), adr);
3841 
3842   // Divide by card size
3843   assert(Universe::heap()->barrier_set()->is_a(BarrierSet::CardTableModRef),
3844          "Only one we handle so far.");
3845   Node* card_offset = __ URShiftX( cast, __ ConI(CardTableModRefBS::card_shift) );
3846 
3847   // Combine card table base and card offset
3848   Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset );
3849 
3850   // Get the alias_index for raw card-mark memory
3851   int adr_type = Compile::AliasIdxRaw;
3852   Node*   zero = __ ConI(0); // Dirty card value
3853   BasicType bt = T_BYTE;
3854 
3855   if (UseConcMarkSweepGC && UseCondCardMark) {
3856     insert_store_load_for_barrier();
3857     __ sync_kit(this);
3858   }
3859 
3860   if (UseCondCardMark) {
3861     // The classic GC reference write barrier is typically implemented
3862     // as a store into the global card mark table.  Unfortunately
3863     // unconditional stores can result in false sharing and excessive
3864     // coherence traffic as well as false transactional aborts.
3865     // UseCondCardMark enables MP "polite" conditional card mark
3866     // stores.  In theory we could relax the load from ctrl() to
3867     // no_ctrl, but that doesn't buy much latitude.
3868     Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, bt, adr_type);
3869     __ if_then(card_val, BoolTest::ne, zero);
3870   }
3871 
3872   // Smash zero into card
3873   if( !UseConcMarkSweepGC ) {
3874     __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::unordered);
3875   } else {
3876     // Specialized path for CM store barrier


4276     // Does the store cause us to cross regions?
4277 
4278     // Should be able to do an unsigned compare of region_size instead of
4279     // and extra shift. Do we have an unsigned compare??
4280     // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);
4281     Node* xor_res =  __ URShiftX ( __ XorX( cast,  __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes));
4282 
4283     // if (xor_res == 0) same region so skip
4284     __ if_then(xor_res, BoolTest::ne, zeroX); {
4285 
4286       // No barrier if we are storing a NULL
4287       __ if_then(val, BoolTest::ne, null(), unlikely); {
4288 
4289         // Ok must mark the card if not already dirty
4290 
4291         // load the original value of the card
4292         Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
4293 
4294         __ if_then(card_val, BoolTest::ne, young_card); {
4295           sync_kit(ideal);
4296           insert_store_load_for_barrier();

4297           __ sync_kit(this);
4298 
4299           Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
4300           __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
4301             g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
4302           } __ end_if();
4303         } __ end_if();
4304       } __ end_if();
4305     } __ end_if();
4306   } else {
4307     // Object.clone() instrinsic uses this path.
4308     g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
4309   }
4310 
4311   // Final sync IdealKit and GraphKit.
4312   final_sync(ideal);
4313 }
4314 #undef __
4315 
4316 


src/share/vm/opto/graphKit.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File