892 assert(is_heap_state_test(heap_stable_iff, flags), "Should match the shape");
893 }
894
895 void ShenandoahBarrierC2Support::test_null(Node*& ctrl, Node* val, Node*& null_ctrl, PhaseIdealLoop* phase) {
896 const Type* val_t = phase->igvn().type(val);
897 if (val_t->meet(TypePtr::NULL_PTR) == val_t) {
898 IdealLoopTree* loop = phase->get_loop(ctrl);
899 Node* null_cmp = new CmpPNode(val, phase->igvn().zerocon(T_OBJECT));
900 phase->register_new_node(null_cmp, ctrl);
901 Node* null_test = new BoolNode(null_cmp, BoolTest::ne);
902 phase->register_new_node(null_test, ctrl);
903 IfNode* null_iff = new IfNode(ctrl, null_test, PROB_LIKELY(0.999), COUNT_UNKNOWN);
904 phase->register_control(null_iff, loop, ctrl);
905 ctrl = new IfTrueNode(null_iff);
906 phase->register_control(ctrl, loop, null_iff);
907 null_ctrl = new IfFalseNode(null_iff);
908 phase->register_control(null_ctrl, loop, null_iff);
909 }
910 }
911
912 Node* ShenandoahBarrierC2Support::clone_null_check(Node*& c, Node* val, Node* unc_ctrl, PhaseIdealLoop* phase) {
913 IdealLoopTree *loop = phase->get_loop(c);
914 Node* iff = unc_ctrl->in(0);
915 assert(iff->is_If(), "broken");
916 Node* new_iff = iff->clone();
917 new_iff->set_req(0, c);
918 phase->register_control(new_iff, loop, c);
919 Node* iffalse = new IfFalseNode(new_iff->as_If());
920 phase->register_control(iffalse, loop, new_iff);
921 Node* iftrue = new IfTrueNode(new_iff->as_If());
922 phase->register_control(iftrue, loop, new_iff);
923 c = iftrue;
924 const Type *t = phase->igvn().type(val);
925 assert(val->Opcode() == Op_CastPP, "expect cast to non null here");
926 Node* uncasted_val = val->in(1);
927 val = new CastPPNode(uncasted_val, t);
928 val->init_req(0, c);
929 phase->register_new_node(val, c);
930 return val;
931 }
932
933 void ShenandoahBarrierC2Support::fix_null_check(Node* unc, Node* unc_ctrl, Node* new_unc_ctrl,
934 Unique_Node_List& uses, PhaseIdealLoop* phase) {
935 IfNode* iff = unc_ctrl->in(0)->as_If();
936 Node* proj = iff->proj_out(0);
937 assert(proj != unc_ctrl, "bad projection");
938 Node* use = proj->unique_ctrl_out();
939
940 assert(use == unc || use->is_Region(), "what else?");
941
942 uses.clear();
943 if (use == unc) {
944 phase->set_idom(use, new_unc_ctrl, phase->dom_depth(use));
945 for (uint i = 1; i < unc->req(); i++) {
946 Node* n = unc->in(i);
947 if (phase->has_ctrl(n) && phase->get_ctrl(n) == proj) {
948 uses.push(n);
949 }
950 }
951 } else {
952 assert(use->is_Region(), "what else?");
953 uint idx = 1;
954 for (; use->in(idx) != proj; idx++);
955 for (DUIterator_Fast imax, i = use->fast_outs(imax); i < imax; i++) {
956 Node* u = use->fast_out(i);
957 if (u->is_Phi() && phase->get_ctrl(u->in(idx)) == proj) {
958 uses.push(u->in(idx));
959 }
960 }
961 }
962 for(uint next = 0; next < uses.size(); next++ ) {
963 Node *n = uses.at(next);
964 assert(phase->get_ctrl(n) == proj, "bad control");
965 phase->set_ctrl_and_loop(n, new_unc_ctrl);
966 if (n->in(0) == proj) {
967 phase->igvn().replace_input_of(n, 0, new_unc_ctrl);
968 }
969 for (uint i = 0; i < n->req(); i++) {
970 Node* m = n->in(i);
971 if (m != NULL && phase->has_ctrl(m) && phase->get_ctrl(m) == proj) {
972 uses.push(m);
973 }
974 }
975 }
976
977 phase->igvn().rehash_node_delayed(use);
978 int nb = use->replace_edge(proj, new_unc_ctrl);
979 assert(nb == 1, "only use expected");
980 }
981
982 void ShenandoahBarrierC2Support::in_cset_fast_test(Node*& ctrl, Node*& not_cset_ctrl, Node* val, Node* raw_mem, PhaseIdealLoop* phase) {
983 IdealLoopTree *loop = phase->get_loop(ctrl);
984 Node* raw_rbtrue = new CastP2XNode(ctrl, val);
985 phase->register_new_node(raw_rbtrue, ctrl);
986 Node* cset_offset = new URShiftXNode(raw_rbtrue, phase->igvn().intcon(ShenandoahHeapRegion::region_size_bytes_shift_jint()));
987 phase->register_new_node(cset_offset, ctrl);
988 Node* in_cset_fast_test_base_addr = phase->igvn().makecon(TypeRawPtr::make(ShenandoahHeap::in_cset_fast_test_addr()));
989 phase->set_ctrl(in_cset_fast_test_base_addr, phase->C->root());
990 Node* in_cset_fast_test_adr = new AddPNode(phase->C->top(), in_cset_fast_test_base_addr, cset_offset);
991 phase->register_new_node(in_cset_fast_test_adr, ctrl);
992 uint in_cset_fast_test_idx = Compile::AliasIdxRaw;
993 const TypePtr* in_cset_fast_test_adr_type = NULL; // debug-mode-only argument
994 debug_only(in_cset_fast_test_adr_type = phase->C->get_adr_type(in_cset_fast_test_idx));
995 Node* in_cset_fast_test_load = new LoadBNode(ctrl, raw_mem, in_cset_fast_test_adr, in_cset_fast_test_adr_type, TypeInt::BYTE, MemNode::unordered);
996 phase->register_new_node(in_cset_fast_test_load, ctrl);
997 Node* in_cset_fast_test_cmp = new CmpINode(in_cset_fast_test_load, phase->igvn().zerocon(T_INT));
998 phase->register_new_node(in_cset_fast_test_cmp, ctrl);
999 Node* in_cset_fast_test_test = new BoolNode(in_cset_fast_test_cmp, BoolTest::eq);
1000 phase->register_new_node(in_cset_fast_test_test, ctrl);
1001 IfNode* in_cset_fast_test_iff = new IfNode(ctrl, in_cset_fast_test_test, PROB_UNLIKELY(0.999), COUNT_UNKNOWN);
1009 }
1010
1011 void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase) {
1012 IdealLoopTree*loop = phase->get_loop(ctrl);
1013 const TypePtr* obj_type = phase->igvn().type(val)->is_oopptr();
1014
1015 // The slow path stub consumes and produces raw memory in addition
1016 // to the existing memory edges
1017 Node* base = find_bottom_mem(ctrl, phase);
1018 MergeMemNode* mm = MergeMemNode::make(base);
1019 mm->set_memory_at(Compile::AliasIdxRaw, raw_mem);
1020 phase->register_new_node(mm, ctrl);
1021
1022 address target = LP64_ONLY(UseCompressedOops) NOT_LP64(false) ?
1023 CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow) :
1024 CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier);
1025
1026 address calladdr = is_native ? CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)
1027 : target;
1028 const char* name = is_native ? "load_reference_barrier_native" : "load_reference_barrier";
1029 Node* call = new CallLeafNode(ShenandoahBarrierSetC2::shenandoah_load_reference_barrier_Type(), calladdr, name, TypeRawPtr::BOTTOM);
1030
1031 call->init_req(TypeFunc::Control, ctrl);
1032 call->init_req(TypeFunc::I_O, phase->C->top());
1033 call->init_req(TypeFunc::Memory, mm);
1034 call->init_req(TypeFunc::FramePtr, phase->C->top());
1035 call->init_req(TypeFunc::ReturnAdr, phase->C->top());
1036 call->init_req(TypeFunc::Parms, val);
1037 call->init_req(TypeFunc::Parms+1, load_addr);
1038 phase->register_control(call, loop, ctrl);
1039 ctrl = new ProjNode(call, TypeFunc::Control);
1040 phase->register_control(ctrl, loop, call);
1041 result_mem = new ProjNode(call, TypeFunc::Memory);
1042 phase->register_new_node(result_mem, call);
1043 val = new ProjNode(call, TypeFunc::Parms);
1044 phase->register_new_node(val, call);
1045 val = new CheckCastPPNode(ctrl, val, obj_type);
1046 phase->register_new_node(val, ctrl);
1047 }
1048
1049 void ShenandoahBarrierC2Support::fix_ctrl(Node* barrier, Node* region, const MemoryGraphFixer& fixer, Unique_Node_List& uses, Unique_Node_List& uses_to_ignore, uint last, PhaseIdealLoop* phase) {
1050 Node* ctrl = phase->get_ctrl(barrier);
1051 Node* init_raw_mem = fixer.find_mem(ctrl, barrier);
1052
1053 // Update the control of all nodes that should be after the
1054 // barrier control flow
1055 uses.clear();
1056 // Every node that is control dependent on the barrier's input
1057 // control will be after the expanded barrier. The raw memory (if
1058 // its memory is control dependent on the barrier's input control)
1059 // must stay above the barrier.
1060 uses_to_ignore.clear();
1061 if (phase->has_ctrl(init_raw_mem) && phase->get_ctrl(init_raw_mem) == ctrl && !init_raw_mem->is_Phi()) {
1062 uses_to_ignore.push(init_raw_mem);
1063 }
1064 for (uint next = 0; next < uses_to_ignore.size(); next++) {
1065 Node *n = uses_to_ignore.at(next);
1066 for (uint i = 0; i < n->req(); i++) {
1132 Node* ctrl = phase->get_ctrl(barrier);
1133 IdealLoopTree* loop = phase->get_loop(ctrl);
1134 if (loop->_head->is_OuterStripMinedLoop()) {
1135 // Expanding a barrier here will break loop strip mining
1136 // verification. Transform the loop so the loop nest doesn't
1137 // appear as strip mined.
1138 OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1139 hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1140 }
1141 }
1142
1143 Node_Stack stack(0);
1144 Node_List clones;
1145 for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1146 ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1147 if (lrb->is_redundant()) {
1148 continue;
1149 }
1150
1151 Node* ctrl = phase->get_ctrl(lrb);
1152 Node* val = lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
1153
1154 CallStaticJavaNode* unc = NULL;
1155 Node* unc_ctrl = NULL;
1156 Node* uncasted_val = val;
1157
1158 for (DUIterator_Fast imax, i = lrb->fast_outs(imax); i < imax; i++) {
1159 Node* u = lrb->fast_out(i);
1160 if (u->Opcode() == Op_CastPP &&
1161 u->in(0) != NULL &&
1162 phase->is_dominator(u->in(0), ctrl)) {
1163 const Type* u_t = phase->igvn().type(u);
1164
1165 if (u_t->meet(TypePtr::NULL_PTR) != u_t &&
1166 u->in(0)->Opcode() == Op_IfTrue &&
1167 u->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
1168 u->in(0)->in(0)->is_If() &&
1169 u->in(0)->in(0)->in(1)->Opcode() == Op_Bool &&
1170 u->in(0)->in(0)->in(1)->as_Bool()->_test._test == BoolTest::ne &&
1171 u->in(0)->in(0)->in(1)->in(1)->Opcode() == Op_CmpP &&
1172 u->in(0)->in(0)->in(1)->in(1)->in(1) == val &&
1173 u->in(0)->in(0)->in(1)->in(1)->in(2)->bottom_type() == TypePtr::NULL_PTR) {
1174 IdealLoopTree* loop = phase->get_loop(ctrl);
1175 IdealLoopTree* unc_loop = phase->get_loop(u->in(0));
1176
1177 if (!unc_loop->is_member(loop)) {
1178 continue;
1179 }
1180
1181 Node* branch = no_branches(ctrl, u->in(0), false, phase);
1182 assert(branch == NULL || branch == NodeSentinel, "was not looking for a branch");
1183 if (branch == NodeSentinel) {
1184 continue;
1185 }
1186
1187 phase->igvn().replace_input_of(u, 1, val);
1188 phase->igvn().replace_input_of(lrb, ShenandoahLoadReferenceBarrierNode::ValueIn, u);
1189 phase->set_ctrl(u, u->in(0));
1190 phase->set_ctrl(lrb, u->in(0));
1191 unc = u->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
1192 unc_ctrl = u->in(0);
1193 val = u;
1194
1195 for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {
1196 Node* u = val->fast_out(j);
1197 if (u == lrb) continue;
1198 phase->igvn().rehash_node_delayed(u);
1199 int nb = u->replace_edge(val, lrb);
1200 --j; jmax -= nb;
1201 }
1202
1203 RegionNode* r = new RegionNode(3);
1204 IfNode* iff = unc_ctrl->in(0)->as_If();
1205
1206 Node* ctrl_use = unc_ctrl->unique_ctrl_out();
1207 Node* unc_ctrl_clone = unc_ctrl->clone();
1208 phase->register_control(unc_ctrl_clone, loop, iff);
1209 Node* c = unc_ctrl_clone;
1210 Node* new_cast = clone_null_check(c, val, unc_ctrl_clone, phase);
1211 r->init_req(1, new_cast->in(0)->in(0)->as_If()->proj_out(0));
1212
1213 phase->igvn().replace_input_of(unc_ctrl, 0, c->in(0));
1214 phase->set_idom(unc_ctrl, c->in(0), phase->dom_depth(unc_ctrl));
1215 phase->lazy_replace(c, unc_ctrl);
1216 c = NULL;;
1217 phase->igvn().replace_input_of(val, 0, unc_ctrl_clone);
1218 phase->set_ctrl(val, unc_ctrl_clone);
1219
1220 IfNode* new_iff = new_cast->in(0)->in(0)->as_If();
1221 fix_null_check(unc, unc_ctrl_clone, r, uses, phase);
1222 Node* iff_proj = iff->proj_out(0);
1223 r->init_req(2, iff_proj);
1224 phase->register_control(r, phase->ltree_root(), iff);
1225
1226 Node* new_bol = new_iff->in(1)->clone();
1227 Node* new_cmp = new_bol->in(1)->clone();
1228 assert(new_cmp->Opcode() == Op_CmpP, "broken");
1229 assert(new_cmp->in(1) == val->in(1), "broken");
1230 new_bol->set_req(1, new_cmp);
1231 new_cmp->set_req(1, lrb);
1232 phase->register_new_node(new_bol, new_iff->in(0));
1233 phase->register_new_node(new_cmp, new_iff->in(0));
1234 phase->igvn().replace_input_of(new_iff, 1, new_bol);
1235 phase->igvn().replace_input_of(new_cast, 1, lrb);
1236
1237 for (DUIterator_Fast imax, i = lrb->fast_outs(imax); i < imax; i++) {
1238 Node* u = lrb->fast_out(i);
1239 if (u == new_cast || u == new_cmp) {
1240 continue;
1241 }
1242 phase->igvn().rehash_node_delayed(u);
1243 int nb = u->replace_edge(lrb, new_cast);
1244 assert(nb > 0, "no update?");
1245 --i; imax -= nb;
1246 }
1247
1248 for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
1249 Node* u = val->fast_out(i);
1250 if (u == lrb) {
1251 continue;
1252 }
1253 phase->igvn().rehash_node_delayed(u);
1254 int nb = u->replace_edge(val, new_cast);
1255 assert(nb > 0, "no update?");
1256 --i; imax -= nb;
1257 }
1258
1259 ctrl = unc_ctrl_clone;
1260 phase->set_ctrl_and_loop(lrb, ctrl);
1261 break;
1262 }
1263 }
1264 }
1265 if ((ctrl->is_Proj() && ctrl->in(0)->is_CallJava()) || ctrl->is_CallJava()) {
1266 CallNode* call = ctrl->is_Proj() ? ctrl->in(0)->as_CallJava() : ctrl->as_CallJava();
1267 if (call->entry_point() == OptoRuntime::rethrow_stub()) {
1268 // The rethrow call may have too many projections to be
1269 // properly handled here. Given there's no reason for a
1270 // barrier to depend on the call, move it above the call
1271 stack.push(lrb, 0);
1272 do {
1273 Node* n = stack.node();
1274 uint idx = stack.index();
1275 if (idx < n->req()) {
1276 Node* in = n->in(idx);
1277 stack.set_index(idx+1);
1278 if (in != NULL) {
1279 if (phase->has_ctrl(in)) {
1280 if (phase->is_dominator(call, phase->get_ctrl(in))) {
1281 #ifdef ASSERT
1282 for (uint i = 0; i < stack.size(); i++) {
1283 assert(stack.node_at(i) != in, "node shouldn't have been seen yet");
1284 }
1385 // verification. Transform the loop so the loop nest doesn't
1386 // appear as strip mined.
1387 OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1388 hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1389 }
1390 }
1391
1392 // Expand load-reference-barriers
1393 MemoryGraphFixer fixer(Compile::AliasIdxRaw, true, phase);
1394 Unique_Node_List uses_to_ignore;
1395 for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1396 ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1397 if (lrb->is_redundant()) {
1398 phase->igvn().replace_node(lrb, lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn));
1399 continue;
1400 }
1401 uint last = phase->C->unique();
1402 Node* ctrl = phase->get_ctrl(lrb);
1403 Node* val = lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
1404
1405
1406 Node* orig_ctrl = ctrl;
1407
1408 Node* raw_mem = fixer.find_mem(ctrl, lrb);
1409 Node* init_raw_mem = raw_mem;
1410 Node* raw_mem_for_ctrl = fixer.find_mem(ctrl, NULL);
1411
1412 IdealLoopTree *loop = phase->get_loop(ctrl);
1413 CallStaticJavaNode* unc = lrb->pin_and_expand_null_check(phase->igvn());
1414 Node* unc_ctrl = NULL;
1415 if (unc != NULL) {
1416 if (val->in(ShenandoahLoadReferenceBarrierNode::Control) != ctrl) {
1417 unc = NULL;
1418 } else {
1419 unc_ctrl = val->in(ShenandoahLoadReferenceBarrierNode::Control);
1420 }
1421 }
1422
1423 Node* uncasted_val = val;
1424 if (unc != NULL) {
1425 uncasted_val = val->in(1);
1426 }
1427
1428 Node* heap_stable_ctrl = NULL;
1429 Node* null_ctrl = NULL;
1430
1431 assert(val->bottom_type()->make_oopptr(), "need oop");
1432 assert(val->bottom_type()->make_oopptr()->const_oop() == NULL, "expect non-constant");
1433
1434 enum { _heap_stable = 1, _not_cset, _evac_path, _null_path, PATH_LIMIT };
1435 Node* region = new RegionNode(PATH_LIMIT);
1436 Node* val_phi = new PhiNode(region, uncasted_val->bottom_type()->is_oopptr());
1437 Node* raw_mem_phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM);
1438
1439 // Stable path.
1440 test_heap_state(ctrl, raw_mem, heap_stable_ctrl, phase, ShenandoahHeap::HAS_FORWARDED);
1441 IfNode* heap_stable_iff = heap_stable_ctrl->in(0)->as_If();
1442
1443 // Heap stable case
1444 region->init_req(_heap_stable, heap_stable_ctrl);
1445 val_phi->init_req(_heap_stable, uncasted_val);
1446 raw_mem_phi->init_req(_heap_stable, raw_mem);
1447
1448 Node* reg2_ctrl = NULL;
1449 // Null case
1450 test_null(ctrl, val, null_ctrl, phase);
1451 if (null_ctrl != NULL) {
1452 reg2_ctrl = null_ctrl->in(0);
1453 region->init_req(_null_path, null_ctrl);
1454 val_phi->init_req(_null_path, uncasted_val);
1455 raw_mem_phi->init_req(_null_path, raw_mem);
1456 } else {
1457 region->del_req(_null_path);
1458 val_phi->del_req(_null_path);
1459 raw_mem_phi->del_req(_null_path);
1460 }
1461
1462 // Test for in-cset.
1463 // Wires !in_cset(obj) to slot 2 of region and phis
1464 Node* not_cset_ctrl = NULL;
1465 in_cset_fast_test(ctrl, not_cset_ctrl, uncasted_val, raw_mem, phase);
1466 if (not_cset_ctrl != NULL) {
1467 if (reg2_ctrl == NULL) reg2_ctrl = not_cset_ctrl->in(0);
1468 region->init_req(_not_cset, not_cset_ctrl);
1469 val_phi->init_req(_not_cset, uncasted_val);
1470 raw_mem_phi->init_req(_not_cset, raw_mem);
1471 }
1472
1473 // Resolve object when orig-value is in cset.
1474 // Make the unconditional resolve for fwdptr.
1475 Node* new_val = uncasted_val;
1476 if (unc_ctrl != NULL) {
1477 // Clone the null check in this branch to allow implicit null check
1478 new_val = clone_null_check(ctrl, val, unc_ctrl, phase);
1479 fix_null_check(unc, unc_ctrl, ctrl->in(0)->as_If()->proj_out(0), uses, phase);
1480
1481 IfNode* iff = unc_ctrl->in(0)->as_If();
1482 phase->igvn().replace_input_of(iff, 1, phase->igvn().intcon(1));
1483 }
1484
1485 // Call lrb-stub and wire up that path in slots 4
1486 Node* result_mem = NULL;
1487
1488 Node* fwd = new_val;
1489 Node* addr;
1490 if (ShenandoahSelfFixing) {
1491 VectorSet visited(Thread::current()->resource_area());
1492 addr = get_load_addr(phase, visited, lrb);
1493 } else {
1494 addr = phase->igvn().zerocon(T_OBJECT);
1495 }
1496 if (addr->Opcode() == Op_AddP) {
1497 Node* orig_base = addr->in(AddPNode::Base);
1498 Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), true);
1499 phase->register_new_node(base, ctrl);
1500 if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
1501 // Field access
1502 addr = addr->clone();
1503 addr->set_req(AddPNode::Base, base);
1504 addr->set_req(AddPNode::Address, base);
1505 phase->register_new_node(addr, ctrl);
1506 } else {
1507 Node* addr2 = addr->in(AddPNode::Address);
1508 if (addr2->Opcode() == Op_AddP && addr2->in(AddPNode::Base) == addr2->in(AddPNode::Address) &&
1509 addr2->in(AddPNode::Base) == orig_base) {
1510 addr2 = addr2->clone();
1511 addr2->set_req(AddPNode::Base, base);
1512 addr2->set_req(AddPNode::Address, base);
1513 phase->register_new_node(addr2, ctrl);
1514 addr = addr->clone();
1515 addr->set_req(AddPNode::Base, base);
1516 addr->set_req(AddPNode::Address, addr2);
1517 phase->register_new_node(addr, ctrl);
1518 }
1519 }
1520 }
1521 call_lrb_stub(ctrl, fwd, addr, result_mem, raw_mem, lrb->is_native(), phase);
1522 region->init_req(_evac_path, ctrl);
1523 val_phi->init_req(_evac_path, fwd);
1524 raw_mem_phi->init_req(_evac_path, result_mem);
1525
1526 phase->register_control(region, loop, heap_stable_iff);
1527 Node* out_val = val_phi;
1528 phase->register_new_node(val_phi, region);
1529 phase->register_new_node(raw_mem_phi, region);
1530
1531 fix_ctrl(lrb, region, fixer, uses, uses_to_ignore, last, phase);
1532
1533 ctrl = orig_ctrl;
1534
1535 if (unc != NULL) {
1536 for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
1537 Node* u = val->fast_out(i);
1538 Node* c = phase->ctrl_or_self(u);
1539 if (u != lrb && (c != ctrl || is_dominator_same_ctrl(c, lrb, u, phase))) {
1540 phase->igvn().rehash_node_delayed(u);
1541 int nb = u->replace_edge(val, out_val);
1542 --i, imax -= nb;
1543 }
1544 }
1545 if (val->outcnt() == 0) {
1546 phase->igvn()._worklist.push(val);
1547 }
1548 }
1549 phase->igvn().replace_node(lrb, out_val);
1550
1551 follow_barrier_uses(out_val, ctrl, uses, phase);
1552
1553 for(uint next = 0; next < uses.size(); next++ ) {
1554 Node *n = uses.at(next);
1555 assert(phase->get_ctrl(n) == ctrl, "bad control");
1556 assert(n != init_raw_mem, "should leave input raw mem above the barrier");
1557 phase->set_ctrl(n, region);
1558 follow_barrier_uses(n, ctrl, uses, phase);
1559 }
1560
1561 // The slow path call produces memory: hook the raw memory phi
1562 // from the expanded load reference barrier with the rest of the graph
1563 // which may require adding memory phis at every post dominated
1564 // region and at enclosing loop heads. Use the memory state
1565 // collected in memory_nodes to fix the memory graph. Update that
1566 // memory state as we go.
1567 fixer.fix_mem(ctrl, region, init_raw_mem, raw_mem_for_ctrl, raw_mem_phi, uses);
1568 }
3156 }
3157
3158 bool ShenandoahLoadReferenceBarrierNode::is_redundant() {
3159 Unique_Node_List visited;
3160 Node_Stack stack(0);
3161 stack.push(this, 0);
3162
3163 // Check if the barrier is actually useful: go over nodes looking for useful uses
3164 // (e.g. memory accesses). Stop once we detected a required use. Otherwise, walk
3165 // until we ran out of nodes, and then declare the barrier redundant.
3166 while (stack.size() > 0) {
3167 Node* n = stack.node();
3168 if (visited.member(n)) {
3169 stack.pop();
3170 continue;
3171 }
3172 visited.push(n);
3173 bool visit_users = false;
3174 switch (n->Opcode()) {
3175 case Op_CallStaticJava:
3176 case Op_CallDynamicJava:
3177 case Op_CallLeaf:
3178 case Op_CallLeafNoFP:
3179 case Op_CompareAndSwapL:
3180 case Op_CompareAndSwapI:
3181 case Op_CompareAndSwapB:
3182 case Op_CompareAndSwapS:
3183 case Op_CompareAndSwapN:
3184 case Op_CompareAndSwapP:
3185 case Op_CompareAndExchangeL:
3186 case Op_CompareAndExchangeI:
3187 case Op_CompareAndExchangeB:
3188 case Op_CompareAndExchangeS:
3189 case Op_CompareAndExchangeN:
3190 case Op_CompareAndExchangeP:
3191 case Op_WeakCompareAndSwapL:
3192 case Op_WeakCompareAndSwapI:
3193 case Op_WeakCompareAndSwapB:
3194 case Op_WeakCompareAndSwapS:
3195 case Op_WeakCompareAndSwapN:
3296 #else
3297 // Default to have excess barriers, rather than miss some.
3298 return false;
3299 #endif
3300 }
3301 }
3302
3303 stack.pop();
3304 if (visit_users) {
3305 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
3306 Node* user = n->fast_out(i);
3307 if (user != NULL) {
3308 stack.push(user, 0);
3309 }
3310 }
3311 }
3312 }
3313
3314 // No need for barrier found.
3315 return true;
3316 }
3317
3318 CallStaticJavaNode* ShenandoahLoadReferenceBarrierNode::pin_and_expand_null_check(PhaseIterGVN& igvn) {
3319 Node* val = in(ValueIn);
3320
3321 const Type* val_t = igvn.type(val);
3322
3323 if (val_t->meet(TypePtr::NULL_PTR) != val_t &&
3324 val->Opcode() == Op_CastPP &&
3325 val->in(0) != NULL &&
3326 val->in(0)->Opcode() == Op_IfTrue &&
3327 val->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
3328 val->in(0)->in(0)->is_If() &&
3329 val->in(0)->in(0)->in(1)->Opcode() == Op_Bool &&
3330 val->in(0)->in(0)->in(1)->as_Bool()->_test._test == BoolTest::ne &&
3331 val->in(0)->in(0)->in(1)->in(1)->Opcode() == Op_CmpP &&
3332 val->in(0)->in(0)->in(1)->in(1)->in(1) == val->in(1) &&
3333 val->in(0)->in(0)->in(1)->in(1)->in(2)->bottom_type() == TypePtr::NULL_PTR) {
3334 assert(val->in(0)->in(0)->in(1)->in(1)->in(1) == val->in(1), "");
3335 CallStaticJavaNode* unc = val->in(0)->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
3336 return unc;
3337 }
3338 return NULL;
3339 }
|
892 assert(is_heap_state_test(heap_stable_iff, flags), "Should match the shape");
893 }
894
895 void ShenandoahBarrierC2Support::test_null(Node*& ctrl, Node* val, Node*& null_ctrl, PhaseIdealLoop* phase) {
896 const Type* val_t = phase->igvn().type(val);
897 if (val_t->meet(TypePtr::NULL_PTR) == val_t) {
898 IdealLoopTree* loop = phase->get_loop(ctrl);
899 Node* null_cmp = new CmpPNode(val, phase->igvn().zerocon(T_OBJECT));
900 phase->register_new_node(null_cmp, ctrl);
901 Node* null_test = new BoolNode(null_cmp, BoolTest::ne);
902 phase->register_new_node(null_test, ctrl);
903 IfNode* null_iff = new IfNode(ctrl, null_test, PROB_LIKELY(0.999), COUNT_UNKNOWN);
904 phase->register_control(null_iff, loop, ctrl);
905 ctrl = new IfTrueNode(null_iff);
906 phase->register_control(ctrl, loop, null_iff);
907 null_ctrl = new IfFalseNode(null_iff);
908 phase->register_control(null_ctrl, loop, null_iff);
909 }
910 }
911
912 void ShenandoahBarrierC2Support::in_cset_fast_test(Node*& ctrl, Node*& not_cset_ctrl, Node* val, Node* raw_mem, PhaseIdealLoop* phase) {
913 IdealLoopTree *loop = phase->get_loop(ctrl);
914 Node* raw_rbtrue = new CastP2XNode(ctrl, val);
915 phase->register_new_node(raw_rbtrue, ctrl);
916 Node* cset_offset = new URShiftXNode(raw_rbtrue, phase->igvn().intcon(ShenandoahHeapRegion::region_size_bytes_shift_jint()));
917 phase->register_new_node(cset_offset, ctrl);
918 Node* in_cset_fast_test_base_addr = phase->igvn().makecon(TypeRawPtr::make(ShenandoahHeap::in_cset_fast_test_addr()));
919 phase->set_ctrl(in_cset_fast_test_base_addr, phase->C->root());
920 Node* in_cset_fast_test_adr = new AddPNode(phase->C->top(), in_cset_fast_test_base_addr, cset_offset);
921 phase->register_new_node(in_cset_fast_test_adr, ctrl);
922 uint in_cset_fast_test_idx = Compile::AliasIdxRaw;
923 const TypePtr* in_cset_fast_test_adr_type = NULL; // debug-mode-only argument
924 debug_only(in_cset_fast_test_adr_type = phase->C->get_adr_type(in_cset_fast_test_idx));
925 Node* in_cset_fast_test_load = new LoadBNode(ctrl, raw_mem, in_cset_fast_test_adr, in_cset_fast_test_adr_type, TypeInt::BYTE, MemNode::unordered);
926 phase->register_new_node(in_cset_fast_test_load, ctrl);
927 Node* in_cset_fast_test_cmp = new CmpINode(in_cset_fast_test_load, phase->igvn().zerocon(T_INT));
928 phase->register_new_node(in_cset_fast_test_cmp, ctrl);
929 Node* in_cset_fast_test_test = new BoolNode(in_cset_fast_test_cmp, BoolTest::eq);
930 phase->register_new_node(in_cset_fast_test_test, ctrl);
931 IfNode* in_cset_fast_test_iff = new IfNode(ctrl, in_cset_fast_test_test, PROB_UNLIKELY(0.999), COUNT_UNKNOWN);
939 }
940
941 void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase) {
942 IdealLoopTree*loop = phase->get_loop(ctrl);
943 const TypePtr* obj_type = phase->igvn().type(val)->is_oopptr();
944
945 // The slow path stub consumes and produces raw memory in addition
946 // to the existing memory edges
947 Node* base = find_bottom_mem(ctrl, phase);
948 MergeMemNode* mm = MergeMemNode::make(base);
949 mm->set_memory_at(Compile::AliasIdxRaw, raw_mem);
950 phase->register_new_node(mm, ctrl);
951
952 address target = LP64_ONLY(UseCompressedOops) NOT_LP64(false) ?
953 CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow) :
954 CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier);
955
956 address calladdr = is_native ? CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)
957 : target;
958 const char* name = is_native ? "load_reference_barrier_native" : "load_reference_barrier";
959 Node* call = new CallLeafNode(ShenandoahBarrierSetC2::shenandoah_load_reference_barrier_Type(obj_type), calladdr, name, TypeRawPtr::BOTTOM);
960
961 call->init_req(TypeFunc::Control, ctrl);
962 call->init_req(TypeFunc::I_O, phase->C->top());
963 call->init_req(TypeFunc::Memory, mm);
964 call->init_req(TypeFunc::FramePtr, phase->C->top());
965 call->init_req(TypeFunc::ReturnAdr, phase->C->top());
966 call->init_req(TypeFunc::Parms, val);
967 call->init_req(TypeFunc::Parms+1, load_addr);
968 phase->register_control(call, loop, ctrl);
969 ctrl = new ProjNode(call, TypeFunc::Control);
970 phase->register_control(ctrl, loop, call);
971 result_mem = new ProjNode(call, TypeFunc::Memory);
972 phase->register_new_node(result_mem, call);
973 val = new ProjNode(call, TypeFunc::Parms);
974 phase->register_new_node(val, call);
975 }
976
977 void ShenandoahBarrierC2Support::fix_ctrl(Node* barrier, Node* region, const MemoryGraphFixer& fixer, Unique_Node_List& uses, Unique_Node_List& uses_to_ignore, uint last, PhaseIdealLoop* phase) {
978 Node* ctrl = phase->get_ctrl(barrier);
979 Node* init_raw_mem = fixer.find_mem(ctrl, barrier);
980
981 // Update the control of all nodes that should be after the
982 // barrier control flow
983 uses.clear();
984 // Every node that is control dependent on the barrier's input
985 // control will be after the expanded barrier. The raw memory (if
986 // its memory is control dependent on the barrier's input control)
987 // must stay above the barrier.
988 uses_to_ignore.clear();
989 if (phase->has_ctrl(init_raw_mem) && phase->get_ctrl(init_raw_mem) == ctrl && !init_raw_mem->is_Phi()) {
990 uses_to_ignore.push(init_raw_mem);
991 }
992 for (uint next = 0; next < uses_to_ignore.size(); next++) {
993 Node *n = uses_to_ignore.at(next);
994 for (uint i = 0; i < n->req(); i++) {
1060 Node* ctrl = phase->get_ctrl(barrier);
1061 IdealLoopTree* loop = phase->get_loop(ctrl);
1062 if (loop->_head->is_OuterStripMinedLoop()) {
1063 // Expanding a barrier here will break loop strip mining
1064 // verification. Transform the loop so the loop nest doesn't
1065 // appear as strip mined.
1066 OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1067 hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1068 }
1069 }
1070
1071 Node_Stack stack(0);
1072 Node_List clones;
1073 for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1074 ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1075 if (lrb->is_redundant()) {
1076 continue;
1077 }
1078
1079 Node* ctrl = phase->get_ctrl(lrb);
1080 if ((ctrl->is_Proj() && ctrl->in(0)->is_CallJava()) || ctrl->is_CallJava()) {
1081 CallNode* call = ctrl->is_Proj() ? ctrl->in(0)->as_CallJava() : ctrl->as_CallJava();
1082 if (call->entry_point() == OptoRuntime::rethrow_stub()) {
1083 // The rethrow call may have too many projections to be
1084 // properly handled here. Given there's no reason for a
1085 // barrier to depend on the call, move it above the call
1086 stack.push(lrb, 0);
1087 do {
1088 Node* n = stack.node();
1089 uint idx = stack.index();
1090 if (idx < n->req()) {
1091 Node* in = n->in(idx);
1092 stack.set_index(idx+1);
1093 if (in != NULL) {
1094 if (phase->has_ctrl(in)) {
1095 if (phase->is_dominator(call, phase->get_ctrl(in))) {
1096 #ifdef ASSERT
1097 for (uint i = 0; i < stack.size(); i++) {
1098 assert(stack.node_at(i) != in, "node shouldn't have been seen yet");
1099 }
1200 // verification. Transform the loop so the loop nest doesn't
1201 // appear as strip mined.
1202 OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1203 hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1204 }
1205 }
1206
1207 // Expand load-reference-barriers
1208 MemoryGraphFixer fixer(Compile::AliasIdxRaw, true, phase);
1209 Unique_Node_List uses_to_ignore;
1210 for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1211 ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1212 if (lrb->is_redundant()) {
1213 phase->igvn().replace_node(lrb, lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn));
1214 continue;
1215 }
1216 uint last = phase->C->unique();
1217 Node* ctrl = phase->get_ctrl(lrb);
1218 Node* val = lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
1219
1220 Node* orig_ctrl = ctrl;
1221
1222 Node* raw_mem = fixer.find_mem(ctrl, lrb);
1223 Node* init_raw_mem = raw_mem;
1224 Node* raw_mem_for_ctrl = fixer.find_mem(ctrl, NULL);
1225
1226 IdealLoopTree* loop = phase->get_loop(ctrl);
1227
1228 assert(val->bottom_type()->make_oopptr(), "need oop");
1229 assert(val->bottom_type()->make_oopptr()->const_oop() == NULL, "expect non-constant");
1230
1231 enum { _heap_stable = 1, _not_cset, _evac_path, PATH_LIMIT };
1232 Node* region = new RegionNode(PATH_LIMIT);
1233 Node* val_phi = new PhiNode(region, val->bottom_type()->is_oopptr());
1234 Node* raw_mem_phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM);
1235
1236 // Stable path.
1237 Node* heap_stable_ctrl = NULL;
1238 test_heap_state(ctrl, raw_mem, heap_stable_ctrl, phase, ShenandoahHeap::HAS_FORWARDED);
1239 IfNode* heap_stable_iff = heap_stable_ctrl->in(0)->as_If();
1240
1241 // Heap stable case
1242 region->init_req(_heap_stable, heap_stable_ctrl);
1243 val_phi->init_req(_heap_stable, val);
1244 raw_mem_phi->init_req(_heap_stable, raw_mem);
1245
1246 // Test for in-cset.
1247 // Wires !in_cset(obj) to slot 2 of region and phis
1248 Node* not_cset_ctrl = NULL;
1249 in_cset_fast_test(ctrl, not_cset_ctrl, val, raw_mem, phase);
1250 if (not_cset_ctrl != NULL) {
1251 region->init_req(_not_cset, not_cset_ctrl);
1252 val_phi->init_req(_not_cset, val);
1253 raw_mem_phi->init_req(_not_cset, raw_mem);
1254 }
1255
1256 // Call lrb-stub and wire up that path in slots 4
1257 Node* result_mem = NULL;
1258
1259 Node* addr;
1260 if (ShenandoahSelfFixing) {
1261 VectorSet visited(Thread::current()->resource_area());
1262 addr = get_load_addr(phase, visited, lrb);
1263 } else {
1264 addr = phase->igvn().zerocon(T_OBJECT);
1265 }
1266 if (addr->Opcode() == Op_AddP) {
1267 Node* orig_base = addr->in(AddPNode::Base);
1268 Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), true);
1269 phase->register_new_node(base, ctrl);
1270 if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
1271 // Field access
1272 addr = addr->clone();
1273 addr->set_req(AddPNode::Base, base);
1274 addr->set_req(AddPNode::Address, base);
1275 phase->register_new_node(addr, ctrl);
1276 } else {
1277 Node* addr2 = addr->in(AddPNode::Address);
1278 if (addr2->Opcode() == Op_AddP && addr2->in(AddPNode::Base) == addr2->in(AddPNode::Address) &&
1279 addr2->in(AddPNode::Base) == orig_base) {
1280 addr2 = addr2->clone();
1281 addr2->set_req(AddPNode::Base, base);
1282 addr2->set_req(AddPNode::Address, base);
1283 phase->register_new_node(addr2, ctrl);
1284 addr = addr->clone();
1285 addr->set_req(AddPNode::Base, base);
1286 addr->set_req(AddPNode::Address, addr2);
1287 phase->register_new_node(addr, ctrl);
1288 }
1289 }
1290 }
1291 call_lrb_stub(ctrl, val, addr, result_mem, raw_mem, lrb->is_native(), phase);
1292 region->init_req(_evac_path, ctrl);
1293 val_phi->init_req(_evac_path, val);
1294 raw_mem_phi->init_req(_evac_path, result_mem);
1295
1296 phase->register_control(region, loop, heap_stable_iff);
1297 Node* out_val = val_phi;
1298 phase->register_new_node(val_phi, region);
1299 phase->register_new_node(raw_mem_phi, region);
1300
1301 fix_ctrl(lrb, region, fixer, uses, uses_to_ignore, last, phase);
1302
1303 ctrl = orig_ctrl;
1304
1305 phase->igvn().replace_node(lrb, out_val);
1306
1307 follow_barrier_uses(out_val, ctrl, uses, phase);
1308
1309 for(uint next = 0; next < uses.size(); next++ ) {
1310 Node *n = uses.at(next);
1311 assert(phase->get_ctrl(n) == ctrl, "bad control");
1312 assert(n != init_raw_mem, "should leave input raw mem above the barrier");
1313 phase->set_ctrl(n, region);
1314 follow_barrier_uses(n, ctrl, uses, phase);
1315 }
1316
1317 // The slow path call produces memory: hook the raw memory phi
1318 // from the expanded load reference barrier with the rest of the graph
1319 // which may require adding memory phis at every post dominated
1320 // region and at enclosing loop heads. Use the memory state
1321 // collected in memory_nodes to fix the memory graph. Update that
1322 // memory state as we go.
1323 fixer.fix_mem(ctrl, region, init_raw_mem, raw_mem_for_ctrl, raw_mem_phi, uses);
1324 }
2912 }
2913
2914 bool ShenandoahLoadReferenceBarrierNode::is_redundant() {
2915 Unique_Node_List visited;
2916 Node_Stack stack(0);
2917 stack.push(this, 0);
2918
2919 // Check if the barrier is actually useful: go over nodes looking for useful uses
2920 // (e.g. memory accesses). Stop once we detected a required use. Otherwise, walk
2921 // until we ran out of nodes, and then declare the barrier redundant.
2922 while (stack.size() > 0) {
2923 Node* n = stack.node();
2924 if (visited.member(n)) {
2925 stack.pop();
2926 continue;
2927 }
2928 visited.push(n);
2929 bool visit_users = false;
2930 switch (n->Opcode()) {
2931 case Op_CallStaticJava:
2932 // Uncommon traps don't need barriers, values are handled during deoptimization. It also affects
2933 // optimizing null-checks into implicit null-checks.
2934 if (n->as_CallStaticJava()->uncommon_trap_request() != 0) {
2935 break;
2936 }
2937 case Op_CallDynamicJava:
2938 case Op_CallLeaf:
2939 case Op_CallLeafNoFP:
2940 case Op_CompareAndSwapL:
2941 case Op_CompareAndSwapI:
2942 case Op_CompareAndSwapB:
2943 case Op_CompareAndSwapS:
2944 case Op_CompareAndSwapN:
2945 case Op_CompareAndSwapP:
2946 case Op_CompareAndExchangeL:
2947 case Op_CompareAndExchangeI:
2948 case Op_CompareAndExchangeB:
2949 case Op_CompareAndExchangeS:
2950 case Op_CompareAndExchangeN:
2951 case Op_CompareAndExchangeP:
2952 case Op_WeakCompareAndSwapL:
2953 case Op_WeakCompareAndSwapI:
2954 case Op_WeakCompareAndSwapB:
2955 case Op_WeakCompareAndSwapS:
2956 case Op_WeakCompareAndSwapN:
3057 #else
3058 // Default to have excess barriers, rather than miss some.
3059 return false;
3060 #endif
3061 }
3062 }
3063
3064 stack.pop();
3065 if (visit_users) {
3066 for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
3067 Node* user = n->fast_out(i);
3068 if (user != NULL) {
3069 stack.push(user, 0);
3070 }
3071 }
3072 }
3073 }
3074
3075 // No need for barrier found.
3076 return true;
3077 }
|