902
903 const Type* val_t = igvn.type(val);
904 if (val_t->meet(TypePtr::NULL_PTR) == val_t) {
905 Node* null_cmp = new CmpPNode(val, igvn.zerocon(T_OBJECT));
906 Node* null_test = new BoolNode(null_cmp, BoolTest::ne);
907
908 IfNode* null_iff = new IfNode(old_ctrl, null_test, PROB_LIKELY(0.999), COUNT_UNKNOWN);
909 ctrl = new IfTrueNode(null_iff);
910 null_ctrl = new IfFalseNode(null_iff);
911
912 IdealLoopTree* loop = phase->get_loop(old_ctrl);
913 phase->register_control(null_iff, loop, old_ctrl);
914 phase->register_control(ctrl, loop, null_iff);
915 phase->register_control(null_ctrl, loop, null_iff);
916
917 phase->register_new_node(null_cmp, old_ctrl);
918 phase->register_new_node(null_test, old_ctrl);
919 }
920 }
921
922 void ShenandoahBarrierC2Support::test_in_cset(Node*& ctrl, Node*& not_cset_ctrl, Node* val, Node* raw_mem, PhaseIdealLoop* phase) {
923 Node* old_ctrl = ctrl;
924 PhaseIterGVN& igvn = phase->igvn();
925
926 Node* raw_val = new CastP2XNode(old_ctrl, val);
927 Node* cset_idx = new URShiftXNode(raw_val, igvn.intcon(ShenandoahHeapRegion::region_size_bytes_shift_jint()));
928 Node* cset_addr = igvn.makecon(TypeRawPtr::make(ShenandoahHeap::in_cset_fast_test_addr()));
929 Node* cset_load_addr = new AddPNode(phase->C->top(), cset_addr, cset_idx);
930 Node* cset_load = new LoadBNode(old_ctrl, raw_mem, cset_load_addr,
931 DEBUG_ONLY(phase->C->get_adr_type(Compile::AliasIdxRaw)) NOT_DEBUG(NULL),
932 TypeInt::BYTE, MemNode::unordered);
933 Node* cset_cmp = new CmpINode(cset_load, igvn.zerocon(T_INT));
934 Node* cset_bool = new BoolNode(cset_cmp, BoolTest::ne);
935
936 IfNode* cset_iff = new IfNode(old_ctrl, cset_bool, PROB_UNLIKELY(0.999), COUNT_UNKNOWN);
937 ctrl = new IfTrueNode(cset_iff);
938 not_cset_ctrl = new IfFalseNode(cset_iff);
939
940 IdealLoopTree *loop = phase->get_loop(old_ctrl);
941 phase->register_control(cset_iff, loop, old_ctrl);
953 }
954
955 void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase) {
956 IdealLoopTree*loop = phase->get_loop(ctrl);
957 const TypePtr* obj_type = phase->igvn().type(val)->is_oopptr();
958
959 // The slow path stub consumes and produces raw memory in addition
960 // to the existing memory edges
961 Node* base = find_bottom_mem(ctrl, phase);
962 MergeMemNode* mm = MergeMemNode::make(base);
963 mm->set_memory_at(Compile::AliasIdxRaw, raw_mem);
964 phase->register_new_node(mm, ctrl);
965
966 address target = LP64_ONLY(UseCompressedOops) NOT_LP64(false) ?
967 CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow) :
968 CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier);
969
970 address calladdr = is_native ? CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)
971 : target;
972 const char* name = is_native ? "load_reference_barrier_native" : "load_reference_barrier";
973 Node* call = new CallLeafNode(ShenandoahBarrierSetC2::shenandoah_load_reference_barrier_Type(obj_type), calladdr, name, TypeRawPtr::BOTTOM);
974
975 call->init_req(TypeFunc::Control, ctrl);
976 call->init_req(TypeFunc::I_O, phase->C->top());
977 call->init_req(TypeFunc::Memory, mm);
978 call->init_req(TypeFunc::FramePtr, phase->C->top());
979 call->init_req(TypeFunc::ReturnAdr, phase->C->top());
980 call->init_req(TypeFunc::Parms, val);
981 call->init_req(TypeFunc::Parms+1, load_addr);
982 phase->register_control(call, loop, ctrl);
983 ctrl = new ProjNode(call, TypeFunc::Control);
984 phase->register_control(ctrl, loop, call);
985 result_mem = new ProjNode(call, TypeFunc::Memory);
986 phase->register_new_node(result_mem, call);
987 val = new ProjNode(call, TypeFunc::Parms);
988 phase->register_new_node(val, call);
989 }
990
991 void ShenandoahBarrierC2Support::fix_ctrl(Node* barrier, Node* region, const MemoryGraphFixer& fixer, Unique_Node_List& uses, Unique_Node_List& uses_to_ignore, uint last, PhaseIdealLoop* phase) {
992 Node* ctrl = phase->get_ctrl(barrier);
993 Node* init_raw_mem = fixer.find_mem(ctrl, barrier);
994
995 // Update the control of all nodes that should be after the
996 // barrier control flow
997 uses.clear();
998 // Every node that is control dependent on the barrier's input
999 // control will be after the expanded barrier. The raw memory (if
1000 // its memory is control dependent on the barrier's input control)
1001 // must stay above the barrier.
1002 uses_to_ignore.clear();
1003 if (phase->has_ctrl(init_raw_mem) && phase->get_ctrl(init_raw_mem) == ctrl && !init_raw_mem->is_Phi()) {
1004 uses_to_ignore.push(init_raw_mem);
1005 }
1006 for (uint next = 0; next < uses_to_ignore.size(); next++) {
1007 Node *n = uses_to_ignore.at(next);
1008 for (uint i = 0; i < n->req(); i++) {
1074 Node* ctrl = phase->get_ctrl(barrier);
1075 IdealLoopTree* loop = phase->get_loop(ctrl);
1076 if (loop->_head->is_OuterStripMinedLoop()) {
1077 // Expanding a barrier here will break loop strip mining
1078 // verification. Transform the loop so the loop nest doesn't
1079 // appear as strip mined.
1080 OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1081 hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1082 }
1083 }
1084
1085 Node_Stack stack(0);
1086 Node_List clones;
1087 for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1088 ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1089 if (lrb->is_redundant()) {
1090 continue;
1091 }
1092
1093 Node* ctrl = phase->get_ctrl(lrb);
1094 if ((ctrl->is_Proj() && ctrl->in(0)->is_CallJava()) || ctrl->is_CallJava()) {
1095 CallNode* call = ctrl->is_Proj() ? ctrl->in(0)->as_CallJava() : ctrl->as_CallJava();
1096 if (call->entry_point() == OptoRuntime::rethrow_stub()) {
1097 // The rethrow call may have too many projections to be
1098 // properly handled here. Given there's no reason for a
1099 // barrier to depend on the call, move it above the call
1100 stack.push(lrb, 0);
1101 do {
1102 Node* n = stack.node();
1103 uint idx = stack.index();
1104 if (idx < n->req()) {
1105 Node* in = n->in(idx);
1106 stack.set_index(idx+1);
1107 if (in != NULL) {
1108 if (phase->has_ctrl(in)) {
1109 if (phase->is_dominator(call, phase->get_ctrl(in))) {
1110 #ifdef ASSERT
1111 for (uint i = 0; i < stack.size(); i++) {
1112 assert(stack.node_at(i) != in, "node shouldn't have been seen yet");
1113 }
1214 // verification. Transform the loop so the loop nest doesn't
1215 // appear as strip mined.
1216 OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1217 hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1218 }
1219 }
1220
1221 // Expand load-reference-barriers
1222 MemoryGraphFixer fixer(Compile::AliasIdxRaw, true, phase);
1223 Unique_Node_List uses_to_ignore;
1224 for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1225 ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1226 if (lrb->is_redundant()) {
1227 phase->igvn().replace_node(lrb, lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn));
1228 continue;
1229 }
1230 uint last = phase->C->unique();
1231 Node* ctrl = phase->get_ctrl(lrb);
1232 Node* val = lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
1233
1234 Node* orig_ctrl = ctrl;
1235
1236 Node* raw_mem = fixer.find_mem(ctrl, lrb);
1237 Node* init_raw_mem = raw_mem;
1238 Node* raw_mem_for_ctrl = fixer.find_mem(ctrl, NULL);
1239
1240 IdealLoopTree* loop = phase->get_loop(ctrl);
1241
1242 assert(val->bottom_type()->make_oopptr(), "need oop");
1243 assert(val->bottom_type()->make_oopptr()->const_oop() == NULL, "expect non-constant");
1244
1245 enum { _heap_stable = 1, _not_cset, _evac_path, PATH_LIMIT };
1246 Node* region = new RegionNode(PATH_LIMIT);
1247 Node* val_phi = new PhiNode(region, val->bottom_type()->is_oopptr());
1248 Node* raw_mem_phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM);
1249
1250 // Stable path.
1251 Node* heap_stable_ctrl = NULL;
1252 test_gc_state(ctrl, raw_mem, heap_stable_ctrl, phase, ShenandoahHeap::HAS_FORWARDED);
1253 IfNode* heap_stable_iff = heap_stable_ctrl->in(0)->as_If();
1254
1255 // Heap stable case
1256 region->init_req(_heap_stable, heap_stable_ctrl);
1257 val_phi->init_req(_heap_stable, val);
1258 raw_mem_phi->init_req(_heap_stable, raw_mem);
1259
1260 // Test for in-cset.
1261 // Wires !in_cset(obj) to slot 2 of region and phis
1262 Node* not_cset_ctrl = NULL;
1263 test_in_cset(ctrl, not_cset_ctrl, val, raw_mem, phase);
1264 if (not_cset_ctrl != NULL) {
1265 region->init_req(_not_cset, not_cset_ctrl);
1266 val_phi->init_req(_not_cset, val);
1267 raw_mem_phi->init_req(_not_cset, raw_mem);
1268 }
1269
1270 // Call lrb-stub and wire up that path in slots 4
1271 Node* result_mem = NULL;
1272
1273 Node* addr;
1274 if (ShenandoahSelfFixing) {
1275 VectorSet visited(Thread::current()->resource_area());
1276 addr = get_load_addr(phase, visited, lrb);
1277 } else {
1278 addr = phase->igvn().zerocon(T_OBJECT);
1279 }
1280 if (addr->Opcode() == Op_AddP) {
1281 Node* orig_base = addr->in(AddPNode::Base);
1282 Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), true);
1283 phase->register_new_node(base, ctrl);
1284 if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
1285 // Field access
1286 addr = addr->clone();
1287 addr->set_req(AddPNode::Base, base);
1288 addr->set_req(AddPNode::Address, base);
1289 phase->register_new_node(addr, ctrl);
1290 } else {
1291 Node* addr2 = addr->in(AddPNode::Address);
1292 if (addr2->Opcode() == Op_AddP && addr2->in(AddPNode::Base) == addr2->in(AddPNode::Address) &&
1293 addr2->in(AddPNode::Base) == orig_base) {
1294 addr2 = addr2->clone();
1295 addr2->set_req(AddPNode::Base, base);
1296 addr2->set_req(AddPNode::Address, base);
1297 phase->register_new_node(addr2, ctrl);
1298 addr = addr->clone();
1299 addr->set_req(AddPNode::Base, base);
1300 addr->set_req(AddPNode::Address, addr2);
1301 phase->register_new_node(addr, ctrl);
1302 }
1303 }
1304 }
1305 call_lrb_stub(ctrl, val, addr, result_mem, raw_mem, lrb->is_native(), phase);
1306 region->init_req(_evac_path, ctrl);
1307 val_phi->init_req(_evac_path, val);
1308 raw_mem_phi->init_req(_evac_path, result_mem);
1309
1310 phase->register_control(region, loop, heap_stable_iff);
1311 Node* out_val = val_phi;
1312 phase->register_new_node(val_phi, region);
1313 phase->register_new_node(raw_mem_phi, region);
1314
1315 fix_ctrl(lrb, region, fixer, uses, uses_to_ignore, last, phase);
1316
1317 ctrl = orig_ctrl;
1318
1319 phase->igvn().replace_node(lrb, out_val);
1320
1321 follow_barrier_uses(out_val, ctrl, uses, phase);
1322
1323 for(uint next = 0; next < uses.size(); next++ ) {
1324 Node *n = uses.at(next);
1325 assert(phase->get_ctrl(n) == ctrl, "bad control");
1326 assert(n != init_raw_mem, "should leave input raw mem above the barrier");
1327 phase->set_ctrl(n, region);
1328 follow_barrier_uses(n, ctrl, uses, phase);
1329 }
1330
1331 // The slow path call produces memory: hook the raw memory phi
1332 // from the expanded load reference barrier with the rest of the graph
1333 // which may require adding memory phis at every post dominated
1334 // region and at enclosing loop heads. Use the memory state
1335 // collected in memory_nodes to fix the memory graph. Update that
1336 // memory state as we go.
1337 fixer.fix_mem(ctrl, region, init_raw_mem, raw_mem_for_ctrl, raw_mem_phi, uses);
1338 }
2926 }
2927
2928 bool ShenandoahLoadReferenceBarrierNode::is_redundant() {
2929 Unique_Node_List visited;
2930 Node_Stack stack(0);
2931 stack.push(this, 0);
2932
2933 // Check if the barrier is actually useful: go over nodes looking for useful uses
2934 // (e.g. memory accesses). Stop once we detected a required use. Otherwise, walk
2935 // until we ran out of nodes, and then declare the barrier redundant.
2936 while (stack.size() > 0) {
2937 Node* n = stack.node();
2938 if (visited.member(n)) {
2939 stack.pop();
2940 continue;
2941 }
2942 visited.push(n);
2943 bool visit_users = false;
2944 switch (n->Opcode()) {
2945 case Op_CallStaticJava:
2946 // Uncommon traps don't need barriers, values are handled during deoptimization. It also affects
2947 // optimizing null-checks into implicit null-checks.
2948 if (n->as_CallStaticJava()->uncommon_trap_request() != 0) {
2949 break;
2950 }
2951 case Op_CallDynamicJava:
2952 case Op_CallLeaf:
2953 case Op_CallLeafNoFP:
2954 case Op_CompareAndSwapL:
2955 case Op_CompareAndSwapI:
2956 case Op_CompareAndSwapB:
2957 case Op_CompareAndSwapS:
2958 case Op_CompareAndSwapN:
2959 case Op_CompareAndSwapP:
2960 case Op_CompareAndExchangeL:
2961 case Op_CompareAndExchangeI:
2962 case Op_CompareAndExchangeB:
2963 case Op_CompareAndExchangeS:
2964 case Op_CompareAndExchangeN:
2965 case Op_CompareAndExchangeP:
2966 case Op_WeakCompareAndSwapL:
2967 case Op_WeakCompareAndSwapI:
2968 case Op_WeakCompareAndSwapB:
2969 case Op_WeakCompareAndSwapS:
2970 case Op_WeakCompareAndSwapN:
3071 #else
3072 // Default to have excess barriers, rather than miss some.
3073 return false;
3074 #endif
3075 }
3076 }
3077
3078 stack.pop();
3079 if (visit_users) {
3080 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
3081 Node* user = n->fast_out(i);
3082 if (user != NULL) {
3083 stack.push(user, 0);
3084 }
3085 }
3086 }
3087 }
3088
3089 // No need for barrier found.
3090 return true;
3091 }
|
902
903 const Type* val_t = igvn.type(val);
904 if (val_t->meet(TypePtr::NULL_PTR) == val_t) {
905 Node* null_cmp = new CmpPNode(val, igvn.zerocon(T_OBJECT));
906 Node* null_test = new BoolNode(null_cmp, BoolTest::ne);
907
908 IfNode* null_iff = new IfNode(old_ctrl, null_test, PROB_LIKELY(0.999), COUNT_UNKNOWN);
909 ctrl = new IfTrueNode(null_iff);
910 null_ctrl = new IfFalseNode(null_iff);
911
912 IdealLoopTree* loop = phase->get_loop(old_ctrl);
913 phase->register_control(null_iff, loop, old_ctrl);
914 phase->register_control(ctrl, loop, null_iff);
915 phase->register_control(null_ctrl, loop, null_iff);
916
917 phase->register_new_node(null_cmp, old_ctrl);
918 phase->register_new_node(null_test, old_ctrl);
919 }
920 }
921
922 Node* ShenandoahBarrierC2Support::clone_null_check(Node*& c, Node* val, Node* unc_ctrl, PhaseIdealLoop* phase) {
923 IdealLoopTree *loop = phase->get_loop(c);
924 Node* iff = unc_ctrl->in(0);
925 assert(iff->is_If(), "broken");
926 Node* new_iff = iff->clone();
927 new_iff->set_req(0, c);
928 phase->register_control(new_iff, loop, c);
929 Node* iffalse = new IfFalseNode(new_iff->as_If());
930 phase->register_control(iffalse, loop, new_iff);
931 Node* iftrue = new IfTrueNode(new_iff->as_If());
932 phase->register_control(iftrue, loop, new_iff);
933 c = iftrue;
934 const Type *t = phase->igvn().type(val);
935 assert(val->Opcode() == Op_CastPP, "expect cast to non null here");
936 Node* uncasted_val = val->in(1);
937 val = new CastPPNode(uncasted_val, t);
938 val->init_req(0, c);
939 phase->register_new_node(val, c);
940 return val;
941 }
942
943 void ShenandoahBarrierC2Support::fix_null_check(Node* unc, Node* unc_ctrl, Node* new_unc_ctrl,
944 Unique_Node_List& uses, PhaseIdealLoop* phase) {
945 IfNode* iff = unc_ctrl->in(0)->as_If();
946 Node* proj = iff->proj_out(0);
947 assert(proj != unc_ctrl, "bad projection");
948 Node* use = proj->unique_ctrl_out();
949
950 assert(use == unc || use->is_Region(), "what else?");
951
952 uses.clear();
953 if (use == unc) {
954 phase->set_idom(use, new_unc_ctrl, phase->dom_depth(use));
955 for (uint i = 1; i < unc->req(); i++) {
956 Node* n = unc->in(i);
957 if (phase->has_ctrl(n) && phase->get_ctrl(n) == proj) {
958 uses.push(n);
959 }
960 }
961 } else {
962 assert(use->is_Region(), "what else?");
963 uint idx = 1;
964 for (; use->in(idx) != proj; idx++);
965 for (DUIterator_Fast imax, i = use->fast_outs(imax); i < imax; i++) {
966 Node* u = use->fast_out(i);
967 if (u->is_Phi() && phase->get_ctrl(u->in(idx)) == proj) {
968 uses.push(u->in(idx));
969 }
970 }
971 }
972 for(uint next = 0; next < uses.size(); next++ ) {
973 Node *n = uses.at(next);
974 assert(phase->get_ctrl(n) == proj, "bad control");
975 phase->set_ctrl_and_loop(n, new_unc_ctrl);
976 if (n->in(0) == proj) {
977 phase->igvn().replace_input_of(n, 0, new_unc_ctrl);
978 }
979 for (uint i = 0; i < n->req(); i++) {
980 Node* m = n->in(i);
981 if (m != NULL && phase->has_ctrl(m) && phase->get_ctrl(m) == proj) {
982 uses.push(m);
983 }
984 }
985 }
986
987 phase->igvn().rehash_node_delayed(use);
988 int nb = use->replace_edge(proj, new_unc_ctrl);
989 assert(nb == 1, "only use expected");
990 }
991
992 void ShenandoahBarrierC2Support::test_in_cset(Node*& ctrl, Node*& not_cset_ctrl, Node* val, Node* raw_mem, PhaseIdealLoop* phase) {
993 Node* old_ctrl = ctrl;
994 PhaseIterGVN& igvn = phase->igvn();
995
996 Node* raw_val = new CastP2XNode(old_ctrl, val);
997 Node* cset_idx = new URShiftXNode(raw_val, igvn.intcon(ShenandoahHeapRegion::region_size_bytes_shift_jint()));
998 Node* cset_addr = igvn.makecon(TypeRawPtr::make(ShenandoahHeap::in_cset_fast_test_addr()));
999 Node* cset_load_addr = new AddPNode(phase->C->top(), cset_addr, cset_idx);
1000 Node* cset_load = new LoadBNode(old_ctrl, raw_mem, cset_load_addr,
1001 DEBUG_ONLY(phase->C->get_adr_type(Compile::AliasIdxRaw)) NOT_DEBUG(NULL),
1002 TypeInt::BYTE, MemNode::unordered);
1003 Node* cset_cmp = new CmpINode(cset_load, igvn.zerocon(T_INT));
1004 Node* cset_bool = new BoolNode(cset_cmp, BoolTest::ne);
1005
1006 IfNode* cset_iff = new IfNode(old_ctrl, cset_bool, PROB_UNLIKELY(0.999), COUNT_UNKNOWN);
1007 ctrl = new IfTrueNode(cset_iff);
1008 not_cset_ctrl = new IfFalseNode(cset_iff);
1009
1010 IdealLoopTree *loop = phase->get_loop(old_ctrl);
1011 phase->register_control(cset_iff, loop, old_ctrl);
1023 }
1024
1025 void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase) {
1026 IdealLoopTree*loop = phase->get_loop(ctrl);
1027 const TypePtr* obj_type = phase->igvn().type(val)->is_oopptr();
1028
1029 // The slow path stub consumes and produces raw memory in addition
1030 // to the existing memory edges
1031 Node* base = find_bottom_mem(ctrl, phase);
1032 MergeMemNode* mm = MergeMemNode::make(base);
1033 mm->set_memory_at(Compile::AliasIdxRaw, raw_mem);
1034 phase->register_new_node(mm, ctrl);
1035
1036 address target = LP64_ONLY(UseCompressedOops) NOT_LP64(false) ?
1037 CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow) :
1038 CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier);
1039
1040 address calladdr = is_native ? CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)
1041 : target;
1042 const char* name = is_native ? "load_reference_barrier_native" : "load_reference_barrier";
1043 Node* call = new CallLeafNode(ShenandoahBarrierSetC2::shenandoah_load_reference_barrier_Type(), calladdr, name, TypeRawPtr::BOTTOM);
1044
1045 call->init_req(TypeFunc::Control, ctrl);
1046 call->init_req(TypeFunc::I_O, phase->C->top());
1047 call->init_req(TypeFunc::Memory, mm);
1048 call->init_req(TypeFunc::FramePtr, phase->C->top());
1049 call->init_req(TypeFunc::ReturnAdr, phase->C->top());
1050 call->init_req(TypeFunc::Parms, val);
1051 call->init_req(TypeFunc::Parms+1, load_addr);
1052 phase->register_control(call, loop, ctrl);
1053 ctrl = new ProjNode(call, TypeFunc::Control);
1054 phase->register_control(ctrl, loop, call);
1055 result_mem = new ProjNode(call, TypeFunc::Memory);
1056 phase->register_new_node(result_mem, call);
1057 val = new ProjNode(call, TypeFunc::Parms);
1058 phase->register_new_node(val, call);
1059 val = new CheckCastPPNode(ctrl, val, obj_type);
1060 phase->register_new_node(val, ctrl);
1061 }
1062
1063 void ShenandoahBarrierC2Support::fix_ctrl(Node* barrier, Node* region, const MemoryGraphFixer& fixer, Unique_Node_List& uses, Unique_Node_List& uses_to_ignore, uint last, PhaseIdealLoop* phase) {
1064 Node* ctrl = phase->get_ctrl(barrier);
1065 Node* init_raw_mem = fixer.find_mem(ctrl, barrier);
1066
1067 // Update the control of all nodes that should be after the
1068 // barrier control flow
1069 uses.clear();
1070 // Every node that is control dependent on the barrier's input
1071 // control will be after the expanded barrier. The raw memory (if
1072 // its memory is control dependent on the barrier's input control)
1073 // must stay above the barrier.
1074 uses_to_ignore.clear();
1075 if (phase->has_ctrl(init_raw_mem) && phase->get_ctrl(init_raw_mem) == ctrl && !init_raw_mem->is_Phi()) {
1076 uses_to_ignore.push(init_raw_mem);
1077 }
1078 for (uint next = 0; next < uses_to_ignore.size(); next++) {
1079 Node *n = uses_to_ignore.at(next);
1080 for (uint i = 0; i < n->req(); i++) {
1146 Node* ctrl = phase->get_ctrl(barrier);
1147 IdealLoopTree* loop = phase->get_loop(ctrl);
1148 if (loop->_head->is_OuterStripMinedLoop()) {
1149 // Expanding a barrier here will break loop strip mining
1150 // verification. Transform the loop so the loop nest doesn't
1151 // appear as strip mined.
1152 OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1153 hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1154 }
1155 }
1156
1157 Node_Stack stack(0);
1158 Node_List clones;
1159 for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1160 ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1161 if (lrb->is_redundant()) {
1162 continue;
1163 }
1164
1165 Node* ctrl = phase->get_ctrl(lrb);
1166 Node* val = lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
1167
1168 CallStaticJavaNode* unc = NULL;
1169 Node* unc_ctrl = NULL;
1170 Node* uncasted_val = val;
1171
1172 for (DUIterator_Fast imax, i = lrb->fast_outs(imax); i < imax; i++) {
1173 Node* u = lrb->fast_out(i);
1174 if (u->Opcode() == Op_CastPP &&
1175 u->in(0) != NULL &&
1176 phase->is_dominator(u->in(0), ctrl)) {
1177 const Type* u_t = phase->igvn().type(u);
1178
1179 if (u_t->meet(TypePtr::NULL_PTR) != u_t &&
1180 u->in(0)->Opcode() == Op_IfTrue &&
1181 u->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
1182 u->in(0)->in(0)->is_If() &&
1183 u->in(0)->in(0)->in(1)->Opcode() == Op_Bool &&
1184 u->in(0)->in(0)->in(1)->as_Bool()->_test._test == BoolTest::ne &&
1185 u->in(0)->in(0)->in(1)->in(1)->Opcode() == Op_CmpP &&
1186 u->in(0)->in(0)->in(1)->in(1)->in(1) == val &&
1187 u->in(0)->in(0)->in(1)->in(1)->in(2)->bottom_type() == TypePtr::NULL_PTR) {
1188 IdealLoopTree* loop = phase->get_loop(ctrl);
1189 IdealLoopTree* unc_loop = phase->get_loop(u->in(0));
1190
1191 if (!unc_loop->is_member(loop)) {
1192 continue;
1193 }
1194
1195 Node* branch = no_branches(ctrl, u->in(0), false, phase);
1196 assert(branch == NULL || branch == NodeSentinel, "was not looking for a branch");
1197 if (branch == NodeSentinel) {
1198 continue;
1199 }
1200
1201 phase->igvn().replace_input_of(u, 1, val);
1202 phase->igvn().replace_input_of(lrb, ShenandoahLoadReferenceBarrierNode::ValueIn, u);
1203 phase->set_ctrl(u, u->in(0));
1204 phase->set_ctrl(lrb, u->in(0));
1205 unc = u->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
1206 unc_ctrl = u->in(0);
1207 val = u;
1208
1209 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {
1210 Node* u = val->fast_out(j);
1211 if (u == lrb) continue;
1212 phase->igvn().rehash_node_delayed(u);
1213 int nb = u->replace_edge(val, lrb);
1214 --j; jmax -= nb;
1215 }
1216
1217 RegionNode* r = new RegionNode(3);
1218 IfNode* iff = unc_ctrl->in(0)->as_If();
1219
1220 Node* ctrl_use = unc_ctrl->unique_ctrl_out();
1221 Node* unc_ctrl_clone = unc_ctrl->clone();
1222 phase->register_control(unc_ctrl_clone, loop, iff);
1223 Node* c = unc_ctrl_clone;
1224 Node* new_cast = clone_null_check(c, val, unc_ctrl_clone, phase);
1225 r->init_req(1, new_cast->in(0)->in(0)->as_If()->proj_out(0));
1226
1227 phase->igvn().replace_input_of(unc_ctrl, 0, c->in(0));
1228 phase->set_idom(unc_ctrl, c->in(0), phase->dom_depth(unc_ctrl));
1229 phase->lazy_replace(c, unc_ctrl);
1230 c = NULL;;
1231 phase->igvn().replace_input_of(val, 0, unc_ctrl_clone);
1232 phase->set_ctrl(val, unc_ctrl_clone);
1233
1234 IfNode* new_iff = new_cast->in(0)->in(0)->as_If();
1235 fix_null_check(unc, unc_ctrl_clone, r, uses, phase);
1236 Node* iff_proj = iff->proj_out(0);
1237 r->init_req(2, iff_proj);
1238 phase->register_control(r, phase->ltree_root(), iff);
1239
1240 Node* new_bol = new_iff->in(1)->clone();
1241 Node* new_cmp = new_bol->in(1)->clone();
1242 assert(new_cmp->Opcode() == Op_CmpP, "broken");
1243 assert(new_cmp->in(1) == val->in(1), "broken");
1244 new_bol->set_req(1, new_cmp);
1245 new_cmp->set_req(1, lrb);
1246 phase->register_new_node(new_bol, new_iff->in(0));
1247 phase->register_new_node(new_cmp, new_iff->in(0));
1248 phase->igvn().replace_input_of(new_iff, 1, new_bol);
1249 phase->igvn().replace_input_of(new_cast, 1, lrb);
1250
1251 for (DUIterator_Fast imax, i = lrb->fast_outs(imax); i < imax; i++) {
1252 Node* u = lrb->fast_out(i);
1253 if (u == new_cast || u == new_cmp) {
1254 continue;
1255 }
1256 phase->igvn().rehash_node_delayed(u);
1257 int nb = u->replace_edge(lrb, new_cast);
1258 assert(nb > 0, "no update?");
1259 --i; imax -= nb;
1260 }
1261
1262 for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
1263 Node* u = val->fast_out(i);
1264 if (u == lrb) {
1265 continue;
1266 }
1267 phase->igvn().rehash_node_delayed(u);
1268 int nb = u->replace_edge(val, new_cast);
1269 assert(nb > 0, "no update?");
1270 --i; imax -= nb;
1271 }
1272
1273 ctrl = unc_ctrl_clone;
1274 phase->set_ctrl_and_loop(lrb, ctrl);
1275 break;
1276 }
1277 }
1278 }
1279 if ((ctrl->is_Proj() && ctrl->in(0)->is_CallJava()) || ctrl->is_CallJava()) {
1280 CallNode* call = ctrl->is_Proj() ? ctrl->in(0)->as_CallJava() : ctrl->as_CallJava();
1281 if (call->entry_point() == OptoRuntime::rethrow_stub()) {
1282 // The rethrow call may have too many projections to be
1283 // properly handled here. Given there's no reason for a
1284 // barrier to depend on the call, move it above the call
1285 stack.push(lrb, 0);
1286 do {
1287 Node* n = stack.node();
1288 uint idx = stack.index();
1289 if (idx < n->req()) {
1290 Node* in = n->in(idx);
1291 stack.set_index(idx+1);
1292 if (in != NULL) {
1293 if (phase->has_ctrl(in)) {
1294 if (phase->is_dominator(call, phase->get_ctrl(in))) {
1295 #ifdef ASSERT
1296 for (uint i = 0; i < stack.size(); i++) {
1297 assert(stack.node_at(i) != in, "node shouldn't have been seen yet");
1298 }
1399 // verification. Transform the loop so the loop nest doesn't
1400 // appear as strip mined.
1401 OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1402 hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1403 }
1404 }
1405
1406 // Expand load-reference-barriers
1407 MemoryGraphFixer fixer(Compile::AliasIdxRaw, true, phase);
1408 Unique_Node_List uses_to_ignore;
1409 for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1410 ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1411 if (lrb->is_redundant()) {
1412 phase->igvn().replace_node(lrb, lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn));
1413 continue;
1414 }
1415 uint last = phase->C->unique();
1416 Node* ctrl = phase->get_ctrl(lrb);
1417 Node* val = lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
1418
1419
1420 Node* orig_ctrl = ctrl;
1421
1422 Node* raw_mem = fixer.find_mem(ctrl, lrb);
1423 Node* init_raw_mem = raw_mem;
1424 Node* raw_mem_for_ctrl = fixer.find_mem(ctrl, NULL);
1425
1426 IdealLoopTree *loop = phase->get_loop(ctrl);
1427 CallStaticJavaNode* unc = lrb->pin_and_expand_null_check(phase->igvn());
1428 Node* unc_ctrl = NULL;
1429 if (unc != NULL) {
1430 if (val->in(ShenandoahLoadReferenceBarrierNode::Control) != ctrl) {
1431 unc = NULL;
1432 } else {
1433 unc_ctrl = val->in(ShenandoahLoadReferenceBarrierNode::Control);
1434 }
1435 }
1436
1437 Node* uncasted_val = val;
1438 if (unc != NULL) {
1439 uncasted_val = val->in(1);
1440 }
1441
1442 Node* heap_stable_ctrl = NULL;
1443 Node* null_ctrl = NULL;
1444
1445 assert(val->bottom_type()->make_oopptr(), "need oop");
1446 assert(val->bottom_type()->make_oopptr()->const_oop() == NULL, "expect non-constant");
1447
1448 enum { _heap_stable = 1, _not_cset, _evac_path, _null_path, PATH_LIMIT };
1449 Node* region = new RegionNode(PATH_LIMIT);
1450 Node* val_phi = new PhiNode(region, uncasted_val->bottom_type()->is_oopptr());
1451 Node* raw_mem_phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM);
1452
1453 // Stable path.
1454 test_gc_state(ctrl, raw_mem, heap_stable_ctrl, phase, ShenandoahHeap::HAS_FORWARDED);
1455 IfNode* heap_stable_iff = heap_stable_ctrl->in(0)->as_If();
1456
1457 // Heap stable case
1458 region->init_req(_heap_stable, heap_stable_ctrl);
1459 val_phi->init_req(_heap_stable, uncasted_val);
1460 raw_mem_phi->init_req(_heap_stable, raw_mem);
1461
1462 Node* reg2_ctrl = NULL;
1463 // Null case
1464 test_null(ctrl, val, null_ctrl, phase);
1465 if (null_ctrl != NULL) {
1466 reg2_ctrl = null_ctrl->in(0);
1467 region->init_req(_null_path, null_ctrl);
1468 val_phi->init_req(_null_path, uncasted_val);
1469 raw_mem_phi->init_req(_null_path, raw_mem);
1470 } else {
1471 region->del_req(_null_path);
1472 val_phi->del_req(_null_path);
1473 raw_mem_phi->del_req(_null_path);
1474 }
1475
1476 // Test for in-cset.
1477 // Wires !in_cset(obj) to slot 2 of region and phis
1478 Node* not_cset_ctrl = NULL;
1479 test_in_cset(ctrl, not_cset_ctrl, val, raw_mem, phase);
1480 if (not_cset_ctrl != NULL) {
1481 if (reg2_ctrl == NULL) reg2_ctrl = not_cset_ctrl->in(0);
1482 region->init_req(_not_cset, not_cset_ctrl);
1483 val_phi->init_req(_not_cset, uncasted_val);
1484 raw_mem_phi->init_req(_not_cset, raw_mem);
1485 }
1486
1487 // Resolve object when orig-value is in cset.
1488 // Make the unconditional resolve for fwdptr.
1489 Node* new_val = uncasted_val;
1490 if (unc_ctrl != NULL) {
1491 // Clone the null check in this branch to allow implicit null check
1492 new_val = clone_null_check(ctrl, val, unc_ctrl, phase);
1493 fix_null_check(unc, unc_ctrl, ctrl->in(0)->as_If()->proj_out(0), uses, phase);
1494
1495 IfNode* iff = unc_ctrl->in(0)->as_If();
1496 phase->igvn().replace_input_of(iff, 1, phase->igvn().intcon(1));
1497 }
1498
1499 // Call lrb-stub and wire up that path in slots 4
1500 Node* result_mem = NULL;
1501
1502 Node* fwd = new_val;
1503 Node* addr;
1504 if (ShenandoahSelfFixing) {
1505 VectorSet visited(Thread::current()->resource_area());
1506 addr = get_load_addr(phase, visited, lrb);
1507 } else {
1508 addr = phase->igvn().zerocon(T_OBJECT);
1509 }
1510 if (addr->Opcode() == Op_AddP) {
1511 Node* orig_base = addr->in(AddPNode::Base);
1512 Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), true);
1513 phase->register_new_node(base, ctrl);
1514 if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
1515 // Field access
1516 addr = addr->clone();
1517 addr->set_req(AddPNode::Base, base);
1518 addr->set_req(AddPNode::Address, base);
1519 phase->register_new_node(addr, ctrl);
1520 } else {
1521 Node* addr2 = addr->in(AddPNode::Address);
1522 if (addr2->Opcode() == Op_AddP && addr2->in(AddPNode::Base) == addr2->in(AddPNode::Address) &&
1523 addr2->in(AddPNode::Base) == orig_base) {
1524 addr2 = addr2->clone();
1525 addr2->set_req(AddPNode::Base, base);
1526 addr2->set_req(AddPNode::Address, base);
1527 phase->register_new_node(addr2, ctrl);
1528 addr = addr->clone();
1529 addr->set_req(AddPNode::Base, base);
1530 addr->set_req(AddPNode::Address, addr2);
1531 phase->register_new_node(addr, ctrl);
1532 }
1533 }
1534 }
1535 call_lrb_stub(ctrl, fwd, addr, result_mem, raw_mem, lrb->is_native(), phase);
1536 region->init_req(_evac_path, ctrl);
1537 val_phi->init_req(_evac_path, fwd);
1538 raw_mem_phi->init_req(_evac_path, result_mem);
1539
1540 phase->register_control(region, loop, heap_stable_iff);
1541 Node* out_val = val_phi;
1542 phase->register_new_node(val_phi, region);
1543 phase->register_new_node(raw_mem_phi, region);
1544
1545 fix_ctrl(lrb, region, fixer, uses, uses_to_ignore, last, phase);
1546
1547 ctrl = orig_ctrl;
1548
1549 if (unc != NULL) {
1550 for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
1551 Node* u = val->fast_out(i);
1552 Node* c = phase->ctrl_or_self(u);
1553 if (u != lrb && (c != ctrl || is_dominator_same_ctrl(c, lrb, u, phase))) {
1554 phase->igvn().rehash_node_delayed(u);
1555 int nb = u->replace_edge(val, out_val);
1556 --i, imax -= nb;
1557 }
1558 }
1559 if (val->outcnt() == 0) {
1560 phase->igvn()._worklist.push(val);
1561 }
1562 }
1563 phase->igvn().replace_node(lrb, out_val);
1564
1565 follow_barrier_uses(out_val, ctrl, uses, phase);
1566
1567 for(uint next = 0; next < uses.size(); next++ ) {
1568 Node *n = uses.at(next);
1569 assert(phase->get_ctrl(n) == ctrl, "bad control");
1570 assert(n != init_raw_mem, "should leave input raw mem above the barrier");
1571 phase->set_ctrl(n, region);
1572 follow_barrier_uses(n, ctrl, uses, phase);
1573 }
1574
1575 // The slow path call produces memory: hook the raw memory phi
1576 // from the expanded load reference barrier with the rest of the graph
1577 // which may require adding memory phis at every post dominated
1578 // region and at enclosing loop heads. Use the memory state
1579 // collected in memory_nodes to fix the memory graph. Update that
1580 // memory state as we go.
1581 fixer.fix_mem(ctrl, region, init_raw_mem, raw_mem_for_ctrl, raw_mem_phi, uses);
1582 }
3170 }
3171
3172 bool ShenandoahLoadReferenceBarrierNode::is_redundant() {
3173 Unique_Node_List visited;
3174 Node_Stack stack(0);
3175 stack.push(this, 0);
3176
3177 // Check if the barrier is actually useful: go over nodes looking for useful uses
3178 // (e.g. memory accesses). Stop once we detected a required use. Otherwise, walk
3179 // until we ran out of nodes, and then declare the barrier redundant.
3180 while (stack.size() > 0) {
3181 Node* n = stack.node();
3182 if (visited.member(n)) {
3183 stack.pop();
3184 continue;
3185 }
3186 visited.push(n);
3187 bool visit_users = false;
3188 switch (n->Opcode()) {
3189 case Op_CallStaticJava:
3190 case Op_CallDynamicJava:
3191 case Op_CallLeaf:
3192 case Op_CallLeafNoFP:
3193 case Op_CompareAndSwapL:
3194 case Op_CompareAndSwapI:
3195 case Op_CompareAndSwapB:
3196 case Op_CompareAndSwapS:
3197 case Op_CompareAndSwapN:
3198 case Op_CompareAndSwapP:
3199 case Op_CompareAndExchangeL:
3200 case Op_CompareAndExchangeI:
3201 case Op_CompareAndExchangeB:
3202 case Op_CompareAndExchangeS:
3203 case Op_CompareAndExchangeN:
3204 case Op_CompareAndExchangeP:
3205 case Op_WeakCompareAndSwapL:
3206 case Op_WeakCompareAndSwapI:
3207 case Op_WeakCompareAndSwapB:
3208 case Op_WeakCompareAndSwapS:
3209 case Op_WeakCompareAndSwapN:
3310 #else
3311 // Default to have excess barriers, rather than miss some.
3312 return false;
3313 #endif
3314 }
3315 }
3316
3317 stack.pop();
3318 if (visit_users) {
3319 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
3320 Node* user = n->fast_out(i);
3321 if (user != NULL) {
3322 stack.push(user, 0);
3323 }
3324 }
3325 }
3326 }
3327
3328 // No need for barrier found.
3329 return true;
3330 }
3331
3332 CallStaticJavaNode* ShenandoahLoadReferenceBarrierNode::pin_and_expand_null_check(PhaseIterGVN& igvn) {
3333 Node* val = in(ValueIn);
3334
3335 const Type* val_t = igvn.type(val);
3336
3337 if (val_t->meet(TypePtr::NULL_PTR) != val_t &&
3338 val->Opcode() == Op_CastPP &&
3339 val->in(0) != NULL &&
3340 val->in(0)->Opcode() == Op_IfTrue &&
3341 val->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
3342 val->in(0)->in(0)->is_If() &&
3343 val->in(0)->in(0)->in(1)->Opcode() == Op_Bool &&
3344 val->in(0)->in(0)->in(1)->as_Bool()->_test._test == BoolTest::ne &&
3345 val->in(0)->in(0)->in(1)->in(1)->Opcode() == Op_CmpP &&
3346 val->in(0)->in(0)->in(1)->in(1)->in(1) == val->in(1) &&
3347 val->in(0)->in(0)->in(1)->in(1)->in(2)->bottom_type() == TypePtr::NULL_PTR) {
3348 assert(val->in(0)->in(0)->in(1)->in(1)->in(1) == val->in(1), "");
3349 CallStaticJavaNode* unc = val->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
3350 return unc;
3351 }
3352 return NULL;
3353 }
|