1 /*
   2  * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "opto/castnode.hpp"
  26 #include "opto/compile.hpp"
  27 #include "opto/escape.hpp"
  28 #include "opto/graphKit.hpp"
  29 #include "opto/loopnode.hpp"
  30 #include "opto/machnode.hpp"
  31 #include "opto/macro.hpp"
  32 #include "opto/memnode.hpp"
  33 #include "opto/movenode.hpp"
  34 #include "opto/node.hpp"
  35 #include "opto/phase.hpp"
  36 #include "opto/phaseX.hpp"
  37 #include "opto/rootnode.hpp"
  38 #include "opto/type.hpp"
  39 #include "utilities/copy.hpp"
  40 #include "utilities/growableArray.hpp"
  41 #include "utilities/macros.hpp"
  42 #include "gc/z/zBarrierSet.hpp"
  43 #include "gc/z/c2/zBarrierSetC2.hpp"
  44 #include "gc/z/zThreadLocalData.hpp"
  45 #include "gc/z/zBarrierSetRuntime.hpp"
  46 
  47 ZBarrierSetC2State::ZBarrierSetC2State(Arena* comp_arena) :
  48     _load_barrier_nodes(new (comp_arena) GrowableArray<LoadBarrierNode*>(comp_arena, 8,  0, NULL)) {}
  49 
  50 int ZBarrierSetC2State::load_barrier_count() const {
  51   return _load_barrier_nodes->length();
  52 }
  53 
  54 void ZBarrierSetC2State::add_load_barrier_node(LoadBarrierNode * n) {
  55   assert(!_load_barrier_nodes->contains(n), " duplicate entry in expand list");
  56   _load_barrier_nodes->append(n);
  57 }
  58 
  59 void ZBarrierSetC2State::remove_load_barrier_node(LoadBarrierNode * n) {
  60   // this function may be called twice for a node so check
  61   // that the node is in the array before attempting to remove it
  62   if (_load_barrier_nodes->contains(n)) {
  63     _load_barrier_nodes->remove(n);
  64   }
  65 }
  66 
  67 LoadBarrierNode* ZBarrierSetC2State::load_barrier_node(int idx) const {
  68   return _load_barrier_nodes->at(idx);
  69 }
  70 
  71 void* ZBarrierSetC2::create_barrier_state(Arena* comp_arena) const {
  72   return new(comp_arena) ZBarrierSetC2State(comp_arena);
  73 }
  74 
  75 ZBarrierSetC2State* ZBarrierSetC2::state() const {
  76   return reinterpret_cast<ZBarrierSetC2State*>(Compile::current()->barrier_set_state());
  77 }
  78 
  79 bool ZBarrierSetC2::is_gc_barrier_node(Node* node) const {
  80   // 1. This step follows potential oop projections of a load barrier before expansion
  81   if (node->is_Proj()) {
  82     node = node->in(0);
  83   }
  84 
  85   // 2. This step checks for unexpanded load barriers
  86   if (node->is_LoadBarrier()) {
  87     return true;
  88   }
  89 
  90   // 3. This step checks for the phi corresponding to an optimized load barrier expansion
  91   if (node->is_Phi()) {
  92     PhiNode* phi = node->as_Phi();
  93     Node* n = phi->in(1);
  94     if (n != NULL && n->is_LoadBarrierSlowReg()) {
  95       return true;
  96     }
  97   }
  98 
  99   return false;
 100 }
 101 
 102 void ZBarrierSetC2::register_potential_barrier_node(Node* node) const {
 103   if (node->is_LoadBarrier()) {
 104     state()->add_load_barrier_node(node->as_LoadBarrier());
 105   }
 106 }
 107 
 108 void ZBarrierSetC2::unregister_potential_barrier_node(Node* node) const {
 109   if (node->is_LoadBarrier()) {
 110     state()->remove_load_barrier_node(node->as_LoadBarrier());
 111   }
 112 }
 113 
 114 void ZBarrierSetC2::eliminate_useless_gc_barriers(Unique_Node_List &useful, Compile* C) const {
 115   // Remove useless LoadBarrier nodes
 116   ZBarrierSetC2State* s = state();
 117   for (int i = s->load_barrier_count()-1; i >= 0; i--) {
 118     LoadBarrierNode* n = s->load_barrier_node(i);
 119     if (!useful.member(n)) {
 120       unregister_potential_barrier_node(n);
 121     }
 122   }
 123 }
 124 
 125 void ZBarrierSetC2::enqueue_useful_gc_barrier(PhaseIterGVN* igvn, Node* node) const {
 126   if (node->is_LoadBarrier() && !node->as_LoadBarrier()->has_true_uses()) {
 127     igvn->_worklist.push(node);
 128   }
 129 }
 130 
 131 // == LoadBarrierNode ==
 132 
 133 LoadBarrierNode::LoadBarrierNode(Compile* C,
 134                                  Node* c,
 135                                  Node* mem,
 136                                  Node* val,
 137                                  Node* adr,
 138                                  bool weak) :
 139     MultiNode(Number_of_Inputs),
 140     _weak(weak) {
 141   init_req(Control, c);
 142   init_req(Memory, mem);
 143   init_req(Oop, val);
 144   init_req(Address, adr);
 145   init_req(Similar, C->top());
 146 
 147   init_class_id(Class_LoadBarrier);
 148   BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
 149   bs->register_potential_barrier_node(this);
 150 }
 151 
 152 uint LoadBarrierNode::size_of() const {
 153   return sizeof(*this);
 154 }
 155 
 156 bool LoadBarrierNode::cmp(const Node& n) const {
 157   ShouldNotReachHere();
 158   return false;
 159 }
 160 
 161 const Type *LoadBarrierNode::bottom_type() const {
 162   const Type** floadbarrier = (const Type **)(Compile::current()->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*)));
 163   Node* in_oop = in(Oop);
 164   floadbarrier[Control] = Type::CONTROL;
 165   floadbarrier[Memory] = Type::MEMORY;
 166   floadbarrier[Oop] = in_oop == NULL ? Type::TOP : in_oop->bottom_type();
 167   return TypeTuple::make(Number_of_Outputs, floadbarrier);
 168 }
 169 
 170 const TypePtr* LoadBarrierNode::adr_type() const {
 171   ShouldNotReachHere();
 172   return NULL;
 173 }
 174 
 175 const Type *LoadBarrierNode::Value(PhaseGVN *phase) const {
 176   const Type** floadbarrier = (const Type **)(phase->C->type_arena()->Amalloc_4((Number_of_Outputs)*sizeof(Type*)));
 177   const Type* val_t = phase->type(in(Oop));
 178   floadbarrier[Control] = Type::CONTROL;
 179   floadbarrier[Memory]  = Type::MEMORY;
 180   floadbarrier[Oop]     = val_t;
 181   return TypeTuple::make(Number_of_Outputs, floadbarrier);
 182 }
 183 
 184 bool LoadBarrierNode::is_dominator(PhaseIdealLoop* phase, bool linear_only, Node *d, Node *n) {
 185   if (phase != NULL) {
 186     return phase->is_dominator(d, n);
 187   }
 188 
 189   for (int i = 0; i < 10 && n != NULL; i++) {
 190     n = IfNode::up_one_dom(n, linear_only);
 191     if (n == d) {
 192       return true;
 193     }
 194   }
 195 
 196   return false;
 197 }
 198 
 199 LoadBarrierNode* LoadBarrierNode::has_dominating_barrier(PhaseIdealLoop* phase, bool linear_only, bool look_for_similar) {
 200   if (is_weak()) {
 201     // Weak barriers can't be eliminated
 202     return NULL;
 203   }
 204 
 205   Node* val = in(LoadBarrierNode::Oop);
 206   if (in(Similar)->is_Proj() && in(Similar)->in(0)->is_LoadBarrier()) {
 207     LoadBarrierNode* lb = in(Similar)->in(0)->as_LoadBarrier();
 208     assert(lb->in(Address) == in(Address), "");
 209     // Load barrier on Similar edge dominates so if it now has the Oop field it can replace this barrier.
 210     if (lb->in(Oop) == in(Oop)) {
 211       return lb;
 212     }
 213     // Follow chain of load barrier through Similar edges
 214     while (!lb->in(Similar)->is_top()) {
 215       lb = lb->in(Similar)->in(0)->as_LoadBarrier();
 216       assert(lb->in(Address) == in(Address), "");
 217     }
 218     if (lb != in(Similar)->in(0)) {
 219       return lb;
 220     }
 221   }
 222   for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
 223     Node* u = val->fast_out(i);
 224     if (u != this && u->is_LoadBarrier() && u->in(Oop) == val && u->as_LoadBarrier()->has_true_uses()) {
 225       Node* this_ctrl = in(LoadBarrierNode::Control);
 226       Node* other_ctrl = u->in(LoadBarrierNode::Control);
 227       if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) {
 228         return u->as_LoadBarrier();
 229       }
 230     }
 231   }
 232 
 233   if (can_be_eliminated()) {
 234     return NULL;
 235   }
 236 
 237   if (!look_for_similar) {
 238     return NULL;
 239   }
 240 
 241   Node* addr = in(LoadBarrierNode::Address);
 242   for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) {
 243     Node* u = addr->fast_out(i);
 244     if (u != this && u->is_LoadBarrier() && u->as_LoadBarrier()->has_true_uses()) {
 245       Node* this_ctrl = in(LoadBarrierNode::Control);
 246       Node* other_ctrl = u->in(LoadBarrierNode::Control);
 247       if (is_dominator(phase, linear_only, other_ctrl, this_ctrl)) {
 248         ResourceMark rm;
 249         Unique_Node_List wq;
 250         wq.push(in(LoadBarrierNode::Control));
 251         bool ok = true;
 252         bool dom_found = false;
 253         for (uint next = 0; next < wq.size(); ++next) {
 254           Node *n = wq.at(next);
 255           if (n->is_top()) {
 256             return NULL;
 257           }
 258           assert(n->is_CFG(), "");
 259           if (n->is_SafePoint()) {
 260             ok = false;
 261             break;
 262           }
 263           if (n == u) {
 264             dom_found = true;
 265             continue;
 266           }
 267           if (n->is_Region()) {
 268             for (uint i = 1; i < n->req(); i++) {
 269               Node* m = n->in(i);
 270               if (m != NULL) {
 271                 wq.push(m);
 272               }
 273             }
 274           } else {
 275             Node* m = n->in(0);
 276             if (m != NULL) {
 277               wq.push(m);
 278             }
 279           }
 280         }
 281         if (ok) {
 282           assert(dom_found, "");
 283           return u->as_LoadBarrier();;
 284         }
 285         break;
 286       }
 287     }
 288   }
 289 
 290   return NULL;
 291 }
 292 
 293 void LoadBarrierNode::push_dominated_barriers(PhaseIterGVN* igvn) const {
 294   // Change to that barrier may affect a dominated barrier so re-push those
 295   assert(!is_weak(), "sanity");
 296   Node* val = in(LoadBarrierNode::Oop);
 297 
 298   for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
 299     Node* u = val->fast_out(i);
 300     if (u != this && u->is_LoadBarrier() && u->in(Oop) == val) {
 301       Node* this_ctrl = in(Control);
 302       Node* other_ctrl = u->in(Control);
 303       if (is_dominator(NULL, false, this_ctrl, other_ctrl)) {
 304         igvn->_worklist.push(u);
 305       }
 306     }
 307 
 308     Node* addr = in(LoadBarrierNode::Address);
 309     for (DUIterator_Fast imax, i = addr->fast_outs(imax); i < imax; i++) {
 310       Node* u = addr->fast_out(i);
 311       if (u != this && u->is_LoadBarrier() && u->in(Similar)->is_top()) {
 312         Node* this_ctrl = in(Control);
 313         Node* other_ctrl = u->in(Control);
 314         if (is_dominator(NULL, false, this_ctrl, other_ctrl)) {
 315           igvn->_worklist.push(u);
 316         }
 317       }
 318     }
 319   }
 320 }
 321 
 322 Node *LoadBarrierNode::Identity(PhaseGVN *phase) {
 323   LoadBarrierNode* dominating_barrier = has_dominating_barrier(NULL, true, false);
 324   if (dominating_barrier != NULL) {
 325     assert(!is_weak(), "Weak barriers cant be eliminated");
 326     assert(dominating_barrier->in(Oop) == in(Oop), "");
 327     return dominating_barrier;
 328   }
 329 
 330   return this;
 331 }
 332 
 333 Node *LoadBarrierNode::Ideal(PhaseGVN *phase, bool can_reshape) {
 334   if (remove_dead_region(phase, can_reshape)) {
 335     return this;
 336   }
 337 
 338   Node *val = in(Oop);
 339   Node *mem = in(Memory);
 340   Node *ctrl = in(Control);
 341 
 342   assert(val->Opcode() != Op_LoadN, "");
 343   assert(val->Opcode() != Op_DecodeN, "");
 344 
 345   if (mem->is_MergeMem()) {
 346     Node *new_mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw);
 347     set_req(Memory, new_mem);
 348     if (mem->outcnt() == 0 && can_reshape) {
 349       phase->is_IterGVN()->_worklist.push(mem);
 350     }
 351     return this;
 352   }
 353 
 354   LoadBarrierNode *dominating_barrier = NULL;
 355   if (!is_weak()) {
 356     dominating_barrier = has_dominating_barrier(NULL, !can_reshape, !phase->C->major_progress());
 357     if (dominating_barrier != NULL && dominating_barrier->in(Oop) != in(Oop)) {
 358       assert(in(Address) == dominating_barrier->in(Address), "");
 359       set_req(Similar, dominating_barrier->proj_out(Oop));
 360       return this;
 361     }
 362   }
 363 
 364   bool eliminate = can_reshape && (dominating_barrier != NULL || !has_true_uses());
 365   if (eliminate) {
 366     if (can_reshape) {
 367       PhaseIterGVN* igvn = phase->is_IterGVN();
 368       Node* out_ctrl = proj_out_or_null(Control);
 369       Node* out_res = proj_out_or_null(Oop);
 370 
 371       if (out_ctrl != NULL) {
 372         igvn->replace_node(out_ctrl, ctrl);
 373       }
 374 
 375       // That transformation may cause the Similar edge on the load barrier to be invalid
 376       fix_similar_in_uses(igvn);
 377       if (out_res != NULL) {
 378         if (dominating_barrier != NULL) {
 379           assert(!is_weak(), "Sanity");
 380           igvn->replace_node(out_res, dominating_barrier->proj_out(Oop));
 381         } else {
 382           igvn->replace_node(out_res, val);
 383         }
 384       }
 385     }
 386     return new ConINode(TypeInt::ZERO);
 387   }
 388 
 389   // If the Similar edge is no longer a load barrier, clear it
 390   Node* similar = in(Similar);
 391   if (!similar->is_top() && !(similar->is_Proj() && similar->in(0)->is_LoadBarrier())) {
 392     set_req(Similar, phase->C->top());
 393     return this;
 394   }
 395 
 396   if (can_reshape && !is_weak()) {
 397     // If this barrier is linked through the Similar edge by a
 398     // dominated barrier and both barriers have the same Oop field,
 399     // the dominated barrier can go away, so push it for reprocessing.
 400     // We also want to avoid a barrier to depend on another dominating
 401     // barrier through its Similar edge that itself depend on another
 402     // barrier through its Similar edge and rather have the first
 403     // depend on the third.
 404     PhaseIterGVN* igvn = phase->is_IterGVN();
 405     Node* out_res = proj_out(Oop);
 406     for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
 407       Node* u = out_res->fast_out(i);
 408       if (u->is_LoadBarrier() && u->in(Similar) == out_res &&
 409           (u->in(Oop) == val || !u->in(Similar)->is_top())) {
 410         assert(!u->as_LoadBarrier()->is_weak(), "Sanity");
 411         igvn->_worklist.push(u);
 412       }
 413     }
 414     push_dominated_barriers(igvn);
 415   }
 416 
 417   return NULL;
 418 }
 419 
 420 uint LoadBarrierNode::match_edge(uint idx) const {
 421   ShouldNotReachHere();
 422   return 0;
 423 }
 424 
 425 void LoadBarrierNode::fix_similar_in_uses(PhaseIterGVN* igvn) {
 426   Node* out_res = proj_out_or_null(Oop);
 427   if (out_res == NULL) {
 428     return;
 429   }
 430 
 431   for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
 432     Node* u = out_res->fast_out(i);
 433     if (u->is_LoadBarrier() && u->in(Similar) == out_res) {
 434       igvn->replace_input_of(u, Similar, igvn->C->top());
 435       --i;
 436       --imax;
 437     }
 438   }
 439 }
 440 
 441 bool LoadBarrierNode::has_true_uses() const {
 442   Node* out_res = proj_out_or_null(Oop);
 443   if (out_res != NULL) {
 444     for (DUIterator_Fast imax, i = out_res->fast_outs(imax); i < imax; i++) {
 445       Node *u = out_res->fast_out(i);
 446       if (!u->is_LoadBarrier() || u->in(Similar) != out_res) {
 447         return true;
 448       }
 449     }
 450   }
 451   return false;
 452 }
 453 
 454 static bool barrier_needed(C2Access& access) {
 455   return ZBarrierSet::barrier_needed(access.decorators(), access.type());
 456 }
 457 
 458 Node* ZBarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
 459   Node* p = BarrierSetC2::load_at_resolved(access, val_type);
 460   if (!barrier_needed(access)) {
 461     return p;
 462   }
 463 
 464   bool weak = (access.decorators() & ON_WEAK_OOP_REF) != 0;
 465   assert(access.is_parse_access(), "entry not supported at optimization time");
 466   if (p->isa_Load()) {
 467     p->as_Load()->set_barrier(weak);
 468   }
 469   return p;
 470 }
 471 
 472 Node* ZBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
 473                                                     Node* new_val, const Type* val_type) const {
 474   Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, val_type);
 475   LoadStoreNode* lsn = result->as_LoadStore();
 476   if (barrier_needed(access)) {
 477     lsn->set_has_barrier();
 478   }
 479   return lsn;
 480 }
 481 
 482 Node* ZBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicParseAccess& access, Node* expected_val,
 483                                                      Node* new_val, const Type* value_type) const {
 484   Node* result = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
 485    LoadStoreNode* lsn = result->as_LoadStore();
 486    if (barrier_needed(access)) {
 487      lsn->set_has_barrier();
 488    }
 489    return lsn;
 490 }
 491 
 492 Node* ZBarrierSetC2::atomic_xchg_at_resolved(C2AtomicParseAccess& access, Node* new_val, const Type* val_type) const {
 493   Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, val_type);
 494   LoadStoreNode* lsn = result->as_LoadStore();
 495   if (barrier_needed(access)) {
 496     lsn->set_has_barrier();
 497   }
 498   return lsn;
 499 }
 500 
 501 // == Macro Expansion ==
 502 
 503 // Optimized, low spill, loadbarrier variant using stub specialized on register used
 504 void ZBarrierSetC2::expand_loadbarrier_node(PhaseMacroExpand* phase, LoadBarrierNode* barrier) const {
 505   PhaseIterGVN &igvn = phase->igvn();
 506   float unlikely  = PROB_UNLIKELY(0.999);
 507 
 508   Node* in_ctrl = barrier->in(LoadBarrierNode::Control);
 509   Node* in_mem = barrier->in(LoadBarrierNode::Memory);
 510   Node* in_val = barrier->in(LoadBarrierNode::Oop);
 511   Node* in_adr = barrier->in(LoadBarrierNode::Address);
 512 
 513   Node* out_ctrl = barrier->proj_out_or_null(LoadBarrierNode::Control);
 514   Node* out_res = barrier->proj_out(LoadBarrierNode::Oop);
 515 
 516   assert(barrier->in(LoadBarrierNode::Oop) != NULL, "oop to loadbarrier node cannot be null");
 517 
 518   Node* jthread = igvn.transform(new ThreadLocalNode());
 519   Node* adr = phase->basic_plus_adr(jthread, in_bytes(ZThreadLocalData::address_bad_mask_offset()));
 520   Node* bad_mask = igvn.transform(LoadNode::make(igvn, in_ctrl, in_mem, adr,
 521                                                  TypeRawPtr::BOTTOM, TypeX_X, TypeX_X->basic_type(),
 522                                                  MemNode::unordered));
 523   Node* cast = igvn.transform(new CastP2XNode(in_ctrl, in_val));
 524   Node* obj_masked = igvn.transform(new AndXNode(cast, bad_mask));
 525   Node* cmp = igvn.transform(new CmpXNode(obj_masked, igvn.zerocon(TypeX_X->basic_type())));
 526   Node *bol = igvn.transform(new BoolNode(cmp, BoolTest::ne))->as_Bool();
 527   IfNode* iff = igvn.transform(new IfNode(in_ctrl, bol, unlikely, COUNT_UNKNOWN))->as_If();
 528   Node* then = igvn.transform(new IfTrueNode(iff));
 529   Node* elsen = igvn.transform(new IfFalseNode(iff));
 530 
 531   Node* new_loadp = igvn.transform(new LoadBarrierSlowRegNode(then, in_mem, in_adr, in_val->adr_type(),
 532                                                                     (const TypePtr*) in_val->bottom_type(), MemNode::unordered, barrier->is_weak()));
 533 
 534   // Create the final region/phi pair to converge cntl/data paths to downstream code
 535   Node* result_region = igvn.transform(new RegionNode(3));
 536   result_region->set_req(1, then);
 537   result_region->set_req(2, elsen);
 538 
 539   Node* result_phi = igvn.transform(new PhiNode(result_region, TypeInstPtr::BOTTOM));
 540   result_phi->set_req(1, new_loadp);
 541   result_phi->set_req(2, barrier->in(LoadBarrierNode::Oop));
 542 
 543   if (out_ctrl != NULL) {
 544     igvn.replace_node(out_ctrl, result_region);
 545   }
 546   igvn.replace_node(out_res, result_phi);
 547 
 548   assert(barrier->outcnt() == 0,"LoadBarrier macro node has non-null outputs after expansion!");
 549 
 550   igvn.remove_dead_node(barrier);
 551   igvn.remove_dead_node(out_ctrl);
 552   igvn.remove_dead_node(out_res);
 553 
 554   assert(is_gc_barrier_node(result_phi), "sanity");
 555   assert(step_over_gc_barrier(result_phi) == in_val, "sanity");
 556 
 557   phase->C->print_method(PHASE_BARRIER_EXPANSION, 4, barrier->_idx);
 558 }
 559 
 560 bool ZBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const {
 561   ZBarrierSetC2State* s = state();
 562   if (s->load_barrier_count() > 0) {
 563     PhaseMacroExpand macro(igvn);
 564 
 565     int skipped = 0;
 566     while (s->load_barrier_count() > skipped) {
 567       int load_barrier_count = s->load_barrier_count();
 568       LoadBarrierNode * n = s->load_barrier_node(load_barrier_count-1-skipped);
 569       if (igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())) {
 570         // Node is unreachable, so don't try to expand it
 571         s->remove_load_barrier_node(n);
 572         continue;
 573       }
 574       if (!n->can_be_eliminated()) {
 575         skipped++;
 576         continue;
 577       }
 578       expand_loadbarrier_node(&macro, n);
 579       assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list");
 580       if (C->failing()) {
 581         return true;
 582       }
 583     }
 584     while (s->load_barrier_count() > 0) {
 585       int load_barrier_count = s->load_barrier_count();
 586       LoadBarrierNode* n = s->load_barrier_node(load_barrier_count - 1);
 587       assert(!(igvn.type(n) == Type::TOP || (n->in(0) != NULL && n->in(0)->is_top())), "should have been processed already");
 588       assert(!n->can_be_eliminated(), "should have been processed already");
 589       expand_loadbarrier_node(&macro, n);
 590       assert(s->load_barrier_count() < load_barrier_count, "must have deleted a node from load barrier list");
 591       if (C->failing()) {
 592         return true;
 593       }
 594     }
 595     igvn.set_delay_transform(false);
 596     igvn.optimize();
 597     if (C->failing()) {
 598       return true;
 599     }
 600   }
 601 
 602   return false;
 603 }
 604 
 605 Node* ZBarrierSetC2::step_over_gc_barrier(Node* c) const {
 606   Node* node = c;
 607 
 608   // 1. This step follows potential oop projections of a load barrier before expansion
 609   if (node->is_Proj()) {
 610     node = node->in(0);
 611   }
 612 
 613   // 2. This step checks for unexpanded load barriers
 614   if (node->is_LoadBarrier()) {
 615     return node->in(LoadBarrierNode::Oop);
 616   }
 617 
 618   // 3. This step checks for the phi corresponding to an optimized load barrier expansion
 619   if (node->is_Phi()) {
 620     PhiNode* phi = node->as_Phi();
 621     Node* n = phi->in(1);
 622     if (n != NULL && n->is_LoadBarrierSlowReg()) {
 623       assert(c == node, "projections from step 1 should only be seen before macro expansion");
 624       return phi->in(2);
 625     }
 626   }
 627 
 628   return c;
 629 }
 630 
 631 bool ZBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const {
 632   return type == T_OBJECT || type == T_ARRAY;
 633 }
 634 
 635 bool ZBarrierSetC2::final_graph_reshaping(Compile* compile, Node* n, uint opcode) const {
 636   switch (opcode) {
 637     case Op_LoadBarrier:
 638       assert(0, "There should be no load barriers left");
 639     case Op_ZGetAndSetP:
 640     case Op_ZCompareAndExchangeP:
 641     case Op_ZCompareAndSwapP:
 642     case Op_ZWeakCompareAndSwapP:
 643     case Op_LoadBarrierSlowReg:
 644 #ifdef ASSERT
 645       if (VerifyOptoOopOffsets) {
 646         MemNode *mem = n->as_Mem();
 647         // Check to see if address types have grounded out somehow.
 648         const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
 649         ciInstanceKlass *k = tp->klass()->as_instance_klass();
 650         bool oop_offset_is_sane = k->contains_field_offset(tp->offset());
 651         assert(!tp || oop_offset_is_sane, "");
 652       }
 653 #endif
 654       return true;
 655     default:
 656       return false;
 657   }
 658 }
 659 
 660 bool ZBarrierSetC2::matcher_find_shared_visit(Matcher* matcher, Matcher::MStack& mstack, Node* n, uint opcode, bool& mem_op, int& mem_addr_idx) const {
 661   switch(opcode) {
 662     case Op_CallLeaf:
 663       if (n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr() ||
 664           n->as_Call()->entry_point() == ZBarrierSetRuntime::load_barrier_on_weak_oop_field_preloaded_addr()) {
 665         mem_op = true;
 666         mem_addr_idx = TypeFunc::Parms + 1;
 667         return true;
 668       }
 669       return false;
 670     default:
 671       return false;
 672   }
 673 }
 674 
 675 bool ZBarrierSetC2::matcher_find_shared_post_visit(Matcher* matcher, Node* n, uint opcode) const {
 676   switch(opcode) {
 677     case Op_ZCompareAndExchangeP:
 678     case Op_ZCompareAndSwapP:
 679     case Op_ZWeakCompareAndSwapP: {
 680       Node *mem = n->in(MemNode::Address);
 681       Node *keepalive = n->in(5);
 682       Node *pair1 = new BinaryNode(mem, keepalive);
 683 
 684       Node *newval = n->in(MemNode::ValueIn);
 685       Node *oldval = n->in(LoadStoreConditionalNode::ExpectedIn);
 686       Node *pair2 = new BinaryNode(oldval, newval);
 687 
 688       n->set_req(MemNode::Address, pair1);
 689       n->set_req(MemNode::ValueIn, pair2);
 690       n->del_req(5);
 691       n->del_req(LoadStoreConditionalNode::ExpectedIn);
 692       return true;
 693     }
 694     case Op_ZGetAndSetP: {
 695       Node *keepalive = n->in(4);
 696       Node *newval = n->in(MemNode::ValueIn);
 697       Node *pair = new BinaryNode(newval, keepalive);
 698       n->set_req(MemNode::ValueIn, pair);
 699       n->del_req(4);
 700       return true;
 701     }
 702 
 703     default:
 704       return false;
 705   }
 706 }
 707 
 708 // == Verification ==
 709 
 710 #ifdef ASSERT
 711 
 712 static bool look_for_barrier(Node* n, bool post_parse, VectorSet& visited) {
 713   if (visited.test_set(n->_idx)) {
 714     return true;
 715   }
 716 
 717   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 718     Node* u = n->fast_out(i);
 719     if (u->is_LoadBarrier()) {
 720     } else if ((u->is_Phi() || u->is_CMove()) && !post_parse) {
 721       if (!look_for_barrier(u, post_parse, visited)) {
 722         return false;
 723       }
 724     } else if (u->Opcode() == Op_EncodeP || u->Opcode() == Op_DecodeN) {
 725       if (!look_for_barrier(u, post_parse, visited)) {
 726         return false;
 727       }
 728     } else if (u->Opcode() != Op_SCMemProj) {
 729       tty->print("bad use"); u->dump();
 730       return false;
 731     }
 732   }
 733 
 734   return true;
 735 }
 736 
 737 void ZBarrierSetC2::verify_gc_barriers(Compile* compile, CompilePhase phase) const {
 738   switch(phase) {
 739     case BarrierSetC2::BeforeOptimize:
 740     case BarrierSetC2::BeforeLateInsertion:
 741       assert(state()->load_barrier_count() == 0, "No barriers inserted yet");
 742       break;
 743     case BarrierSetC2::BeforeMacroExpand:
 744       // Barrier placement should be set by now.
 745       verify_gc_barriers(false /*post_parse*/);
 746       break;
 747     case BarrierSetC2::BeforeCodeGen:
 748       // Barriers has been fully expanded.
 749       assert(state()->load_barrier_count() == 0, "No more macro barriers");
 750       break;
 751     default:
 752       assert(0, "Phase without verification");
 753   }
 754 }
 755 
 756 // post_parse implies that there might be loadbarrers without uses after parsing
 757 // That only applies when adding barriers at parse time.
 758 void ZBarrierSetC2::verify_gc_barriers(bool post_parse) const {
 759   ZBarrierSetC2State* s = state();
 760   Compile* C = Compile::current();
 761   ResourceMark rm;
 762   VectorSet visited(Thread::current()->resource_area());
 763 
 764   for (int i = 0; i < s->load_barrier_count(); i++) {
 765     LoadBarrierNode* n = s->load_barrier_node(i);
 766 
 767     // The dominating barrier on the same address if it exists and
 768     // this barrier must not be applied on the value from the same
 769     // load otherwise the value is not reloaded before it's used the
 770     // second time.
 771     assert(n->in(LoadBarrierNode::Similar)->is_top() ||
 772            (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() &&
 773             n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Address) == n->in(LoadBarrierNode::Address) &&
 774             n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Oop) != n->in(LoadBarrierNode::Oop)),
 775            "broken similar edge");
 776 
 777     assert(n->as_LoadBarrier()->has_true_uses(),
 778            "found unneeded load barrier");
 779 
 780     // Several load barrier nodes chained through their Similar edge
 781     // break the code that remove the barriers in final graph reshape.
 782     assert(n->in(LoadBarrierNode::Similar)->is_top() ||
 783            (n->in(LoadBarrierNode::Similar)->in(0)->is_LoadBarrier() &&
 784             n->in(LoadBarrierNode::Similar)->in(0)->in(LoadBarrierNode::Similar)->is_top()),
 785            "chain of Similar load barriers");
 786 
 787     if (!n->in(LoadBarrierNode::Similar)->is_top()) {
 788       ResourceMark rm;
 789       Unique_Node_List wq;
 790       Node* other = n->in(LoadBarrierNode::Similar)->in(0);
 791       wq.push(n);
 792       for (uint next = 0; next < wq.size(); ++next) {
 793         Node *nn = wq.at(next);
 794         assert(nn->is_CFG(), "");
 795         assert(!nn->is_SafePoint(), "");
 796 
 797         if (nn == other) {
 798           continue;
 799         }
 800 
 801         if (nn->is_Region()) {
 802           for (uint i = 1; i < nn->req(); i++) {
 803             Node* m = nn->in(i);
 804             if (m != NULL) {
 805               wq.push(m);
 806             }
 807           }
 808         } else {
 809           Node* m = nn->in(0);
 810           if (m != NULL) {
 811             wq.push(m);
 812           }
 813         }
 814       }
 815     }
 816   }
 817 }
 818 
 819 #endif // end verification code
 820 
 821 // This code is cloning all uses of a load that is between a call and the catch blocks,
 822 // to each use.
 823 
 824 static int fixup_uses_in_catch(PhaseIdealLoop *phase, Node *start_ctrl, Node *node) {
 825 
 826   if (!phase->has_ctrl(node)) {
 827     // This node is floating - doesn't need to be cloned.
 828     assert(node != start_ctrl, "check");
 829     return 0; // Unwind - 0 successors
 830   }
 831 
 832   Node* ctrl = phase->get_ctrl(node);
 833   if (ctrl != start_ctrl) {
 834     // We are in a successor block - the node is ok.
 835     return 0; // Unwind - 0 successors
 836   }
 837 
 838   // Process successor nodes
 839   int outcnt = node->outcnt();
 840   for (int i = 0; i < outcnt; i++) {
 841     Node* n = node->raw_out(0);
 842     assert(!n->is_LoadBarrier(), "Sanity");
 843     // Calling recursively, visiting leafs first
 844     fixup_uses_in_catch(phase, start_ctrl, n);
 845   }
 846 
 847   // Now all successors are outside
 848   // - Clone this node to both successors
 849   int no_succs = node->outcnt();
 850   for (DUIterator_Fast jmax, i = node->fast_outs(jmax); i < jmax; i++) {
 851     Node* use = node->fast_out(i);
 852     Node* clone = node->clone();
 853     assert(clone->outcnt() == 0, "");
 854 
 855     assert(use->find_edge(node) != -1, "check");
 856     phase->igvn().rehash_node_delayed(use);
 857     use->replace_edge(node, clone);
 858 
 859     Node* new_ctrl;
 860     if (use->is_block_start()) {
 861       new_ctrl = use;
 862     } else if (use->is_CFG()) {
 863       new_ctrl = use->in(0);
 864       assert (new_ctrl != NULL, "");
 865     } else {
 866       new_ctrl = phase->get_ctrl(use);
 867     }
 868 
 869     phase->set_ctrl(clone, new_ctrl);
 870 
 871     if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr("  Clone op %i as %i to control %i", node->_idx, clone->_idx, new_ctrl->_idx);
 872     phase->igvn().register_new_node_with_optimizer(clone);
 873     --i, --jmax;
 874   }
 875   assert(node->outcnt() == 0, "must be empty now");
 876 
 877   // Node node is dead.
 878   phase->igvn().remove_dead_node(node);
 879 
 880   return no_succs; // unwind - returning number of nodes that were cloned
 881 }
 882 
 883 // Clone a load to a specific catch_proj
 884 static Node* clone_load_to_catchproj(PhaseIdealLoop* phase, Node* load, Node* catch_proj) {
 885   Node* cloned_load = load->clone();
 886   cloned_load->set_req(0, catch_proj);      // set explicit control
 887   phase->set_ctrl(cloned_load, catch_proj); // update
 888   if (phase->C->directive()->ZTraceLoadBarriersOption) tty->print_cr("  Clone LOAD %i as %i to control %i", load->_idx, cloned_load->_idx, catch_proj->_idx);
 889   phase->igvn().register_new_node_with_optimizer(cloned_load);
 890   return cloned_load;
 891 }
 892 
 893 static Node* get_dominating_region(PhaseIdealLoop* phase, Node* node, Node* stop) {
 894   Node* region = node;
 895   while (!region->isa_Region()) {
 896     Node *up = phase->idom(region);
 897     assert(up != region, "Must not loop");
 898     assert(up != stop,   "Must not find original control");
 899     region = up;
 900   }
 901   return region;
 902 }
 903 
 904 // Clone this load to each catch block
 905 static void call_catch_cleanup_one(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl) {
 906   bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
 907   phase->igvn().set_delay_transform(true);
 908 
 909   // Verify pre conditions
 910   assert(ctrl->isa_Proj() && ctrl->in(0)->isa_Call(), "Must be a call proj");
 911   assert(ctrl->raw_out(0)->isa_Catch(), "Must be a catch");
 912 
 913   if (ctrl->raw_out(0)->isa_Catch()->outcnt() == 1) {
 914     if (trace) tty->print_cr("Cleaning up catch: Skipping load %i, call with single catch", load->_idx);
 915     return;
 916   }
 917 
 918   // Process the loads successor nodes - if any is between
 919   // the call and the catch blocks, they need to be cloned to.
 920   // This is done recursively
 921   int outcnt = load->outcnt();
 922   int index = 0;
 923   for (int i = 0; i < outcnt; i++) {
 924     Node* n = load->raw_out(index);
 925     assert(!n->is_LoadBarrier(), "Sanity");
 926     int removed = fixup_uses_in_catch(phase, ctrl, n);
 927     if (removed == 0 ) { // if no successor was cloned, progress to next out.
 928       index++;
 929     }
 930   }
 931 
 932   // Now all the loads uses has been cloned down
 933   // Only thing left is to clone the loads, but they must end up
 934   // first in the catch blocks.
 935 
 936   // We clone the loads oo the catch blocks only when needed.
 937   // An array is used to map the catch blocks to each lazily cloned load.
 938   // In that way no extra unnecessary loads are cloned.
 939 
 940   // Any use dominated by original block must have an phi and a region added
 941 
 942   Node* catch_node = ctrl->raw_out(0);
 943   int number_of_catch_projs = catch_node->outcnt();
 944   Node** proj_to_load_mapping = NEW_RESOURCE_ARRAY(Node*, number_of_catch_projs);
 945   Copy::zero_to_bytes(proj_to_load_mapping, sizeof(Node*) * number_of_catch_projs);
 946 
 947   for (unsigned int i = 0; i  < load->outcnt(); i++) {
 948     Node* load_use_control = NULL;
 949     Node* load_use = load->raw_out(i);
 950 
 951     if (phase->has_ctrl(load_use)) {
 952       load_use_control = phase->get_ctrl(load_use);
 953     } else {
 954       load_use_control = load_use->in(0);
 955     }
 956     assert(load_use_control != NULL, "sanity");
 957     if (trace) tty->print_cr("  Handling use: %i, with control: %i", load_use->_idx, load_use_control->_idx);
 958 
 959     // Some times the loads use is a phi. For them we need to determine from which catch block
 960     // the use is defined.
 961     bool load_use_is_phi = false;
 962     unsigned int load_use_phi_index = 0;
 963     Node* phi_ctrl = NULL;
 964     if (load_use->is_Phi()) {
 965       // Find phi input that matches load
 966       for (unsigned int u = 1; u < load_use->req(); u++) {
 967         if (load_use->in(u) == load) {
 968           load_use_is_phi = true;
 969           load_use_phi_index = u;
 970           assert(load_use->in(0)->is_Region(), "Region or broken");
 971           phi_ctrl = load_use->in(0)->in(u);
 972           assert(phi_ctrl->is_CFG(), "check");
 973           assert(phi_ctrl != load,   "check");
 974           break;
 975         }
 976       }
 977       assert(load_use_is_phi,        "must find");
 978       assert(load_use_phi_index > 0, "sanity");
 979     }
 980 
 981     // For each load use, see witch catch projs dominates, create load clone lazily and reconnect
 982     bool found_dominating_catchproj = false;
 983     for (int c = 0; c < number_of_catch_projs; c++) {
 984       Node* catchproj = catch_node->raw_out(c);
 985       assert(catchproj != NULL && catchproj->isa_CatchProj(), "Sanity");
 986 
 987       if (!phase->is_dominator(catchproj, load_use_control)) {
 988         if (load_use_is_phi && phase->is_dominator(catchproj, phi_ctrl)) {
 989           // The loads use is local to the catchproj.
 990           // fall out and replace load with catch-local load clone.
 991         } else {
 992           continue;
 993         }
 994       }
 995       assert(!found_dominating_catchproj, "Max one should match");
 996 
 997       // Clone loads to catch projs
 998       Node* load_clone = proj_to_load_mapping[c];
 999       if (load_clone == NULL) {
1000         load_clone = clone_load_to_catchproj(phase, load, catchproj);
1001         proj_to_load_mapping[c] = load_clone;
1002       }
1003       phase->igvn().rehash_node_delayed(load_use);
1004 
1005       if (load_use_is_phi) {
1006         // phis are special - the load is defined from a specific control flow
1007         load_use->set_req(load_use_phi_index, load_clone);
1008       } else {
1009         // Multipe edges can be replaced at once - on calls for example
1010         load_use->replace_edge(load, load_clone);
1011       }
1012       --i; // more than one edge can have been removed, but the next is in later iterations
1013 
1014       // We could break the for-loop after finding a dominating match.
1015       // But keep iterating to catch any bad idom early.
1016       found_dominating_catchproj = true;
1017     }
1018 
1019     // We found no single catchproj that dominated the use - The use is at a point after
1020     // where control flow from multiple catch projs have merged. We will have to create
1021     // phi nodes before the use and tie the output from the cloned loads together. It
1022     // can be a single phi or a number of chained phis, depending on control flow
1023     if (!found_dominating_catchproj) {
1024 
1025       // Use phi-control if use is a phi
1026       if (load_use_is_phi) {
1027         load_use_control = phi_ctrl;
1028       }
1029       assert(phase->is_dominator(ctrl, load_use_control), "Common use but no dominator");
1030 
1031       // Clone a load on all paths
1032       for (int c = 0; c < number_of_catch_projs; c++) {
1033         Node* catchproj = catch_node->raw_out(c);
1034         Node* load_clone = proj_to_load_mapping[c];
1035         if (load_clone == NULL) {
1036           load_clone = clone_load_to_catchproj(phase, load, catchproj);
1037           proj_to_load_mapping[c] = load_clone;
1038         }
1039       }
1040 
1041       // Move up dominator tree from use until dom front is reached
1042       Node* next_region = get_dominating_region(phase, load_use_control, ctrl);
1043       while (phase->idom(next_region) != catch_node) {
1044         next_region = phase->idom(next_region);
1045       }
1046       assert(phase->is_dominator(catch_node, next_region), "Sanity");
1047 
1048       // Create a phi node that will collect all cloned loads and feed it to the use.
1049       PhiNode* next_phi = new PhiNode(next_region, load->type());
1050       phase->igvn().rehash_node_delayed(load_use);
1051       load_use->replace_edge(load, next_phi);
1052 
1053       int dominators_of_region = 0;
1054       do {
1055         // New phi, connect to region and add all loads as in.
1056         Node* region = next_region;
1057         assert(region->isa_Region() && region->req() > 2, "Catch dead region nodes");
1058         PhiNode* new_phi = next_phi;
1059 
1060         if (trace) tty->print_cr("Created Phi %i on load %i with control %i", new_phi->_idx, load->_idx, region->_idx);
1061 
1062         // Need to add all cloned loads to the phi, taking care that the right path is matched
1063         dominators_of_region = 0; // reset for new region
1064         for (unsigned int reg_i = 1; reg_i < region->req(); reg_i++) {
1065           Node* region_pred = region->in(reg_i);
1066           assert(region_pred->is_CFG(), "check");
1067           bool pred_has_dominator = false;
1068           for (int c = 0; c < number_of_catch_projs; c++) {
1069             Node* catchproj = catch_node->raw_out(c);
1070             if (phase->is_dominator(catchproj, region_pred)) {
1071               new_phi->set_req(reg_i, proj_to_load_mapping[c]);
1072               if (trace) tty->print_cr(" - Phi in(%i) set to load %i", reg_i, proj_to_load_mapping[c]->_idx);
1073               pred_has_dominator = true;
1074               dominators_of_region++;
1075               break;
1076             }
1077           }
1078 
1079           // Sometimes we need to chain several phis.
1080           if (!pred_has_dominator) {
1081             assert(dominators_of_region <= 1, "More than one region can't require extra phi");
1082             if (trace) tty->print_cr(" - Region %i pred %i not dominated by catch proj", region->_idx, region_pred->_idx);
1083             // Continue search on on this region_pred
1084             // - walk up to next region
1085             // - create a new phi and connect to first new_phi
1086             next_region = get_dominating_region(phase, region_pred, ctrl);
1087             next_phi = new PhiNode(next_region, load->type());
1088             new_phi->set_req(reg_i, next_phi);
1089           }
1090         }
1091 
1092         new_phi->set_req(0, region);
1093         phase->igvn().register_new_node_with_optimizer(new_phi);
1094         phase->set_ctrl(new_phi, region);
1095 
1096         assert(dominators_of_region != 0, "Must have found one this iteration");
1097       } while (dominators_of_region == 1);
1098 
1099       --i;
1100     }
1101   } // end of loop over uses
1102 
1103   assert(load->outcnt() == 0, "All uses should be handled");
1104   phase->igvn().remove_dead_node(load);
1105   phase->C->print_method(PHASE_CALL_CATCH_CLEANUP, 4, load->_idx);
1106 
1107   // Now we should be home
1108   phase->igvn().set_delay_transform(false);
1109 }
1110 
1111 // Sort out the loads that are between a call ant its catch blocks
1112 static void process_catch_cleanup_candidate(PhaseIdealLoop* phase, LoadNode* load) {
1113   bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
1114 
1115   Node* ctrl = phase->get_ctrl(load);
1116   if (!ctrl->is_Proj() || (ctrl->in(0) == NULL) || !ctrl->in(0)->isa_Call()) {
1117     return;
1118   }
1119 
1120   Node* catch_node = ctrl->isa_Proj()->raw_out(0);
1121   if (catch_node->is_Catch()) {
1122     if (catch_node->outcnt() > 1) {
1123       call_catch_cleanup_one(phase, load, ctrl);
1124     } else {
1125       if (trace) tty->print_cr("Call catch cleanup with only one catch: load %i ", load->_idx);
1126     }
1127   }
1128 }
1129 
1130 void ZBarrierSetC2::barrier_insertion_phase(PhaseIterGVN& igvn) const {
1131   PhaseIdealLoop ideal_loop(igvn, LoopOptsBarrierInsertion);
1132   igvn.C->clear_major_progress();
1133 }
1134 
1135 void ZBarrierSetC2::barrier_insertion(PhaseIdealLoop* phase) const {
1136   // First make sure all loads between call and catch are moved to the catch block
1137   clean_catch_blocks(phase);
1138 
1139   // Then expand barriers on all loads
1140   insert_load_barriers(phase);
1141 
1142   // Handle all Unsafe that need barriers.
1143   insert_barriers_on_unsafe(phase);
1144 }
1145 
1146 static bool can_simplify_cas(LoadStoreNode* node) {
1147   if (node->isa_LoadStoreConditional()) {
1148     Node *expected_in = node->as_LoadStoreConditional()->in(LoadStoreConditionalNode::ExpectedIn);
1149     return (expected_in->get_ptr_type() == TypePtr::NULL_PTR);
1150   } else {
1151     return false;
1152   }
1153 }
1154 
1155 static void insert_barrier_before_unsafe(PhaseIdealLoop* phase, LoadStoreNode* old_node) {
1156 
1157   Compile *C = phase->C;
1158   PhaseIterGVN &igvn = phase->igvn();
1159   LoadStoreNode* zclone = NULL;
1160   bool is_weak = false;
1161 
1162   Node *in_ctrl = old_node->in(MemNode::Control);
1163   Node *in_mem  = old_node->in(MemNode::Memory);
1164   Node *in_adr  = old_node->in(MemNode::Address);
1165   Node *in_val  = old_node->in(MemNode::ValueIn);
1166   const TypePtr *adr_type = old_node->adr_type();
1167   const TypePtr* load_type = TypeOopPtr::BOTTOM; // The type for the load we are adding
1168 
1169   switch (old_node->Opcode()) {
1170     case Op_CompareAndExchangeP: {
1171       zclone = new ZCompareAndExchangePNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
1172               adr_type, old_node->get_ptr_type(), ((CompareAndExchangeNode*)old_node)->order());
1173       load_type = old_node->bottom_type()->is_ptr();
1174       break;
1175     }
1176     case Op_WeakCompareAndSwapP: {
1177       if (can_simplify_cas(old_node)) {
1178         break;
1179       }
1180       is_weak  = true;
1181       zclone = new ZWeakCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
1182               ((CompareAndSwapNode*)old_node)->order());
1183       adr_type = TypePtr::BOTTOM;
1184       break;
1185     }
1186     case Op_CompareAndSwapP: {
1187       if (can_simplify_cas(old_node)) {
1188         break;
1189       }
1190       zclone = new ZCompareAndSwapPNode(in_ctrl, in_mem, in_adr, in_val, old_node->in(LoadStoreConditionalNode::ExpectedIn),
1191               ((CompareAndSwapNode*)old_node)->order());
1192       adr_type = TypePtr::BOTTOM;
1193       break;
1194     }
1195     case Op_GetAndSetP: {
1196       zclone = new ZGetAndSetPNode(in_ctrl, in_mem, in_adr, in_val, old_node->adr_type(), old_node->get_ptr_type());
1197       load_type = old_node->bottom_type()->is_ptr();
1198       break;
1199     }
1200   }
1201   if (zclone != NULL) {
1202     igvn.register_new_node_with_optimizer(zclone, old_node);
1203 
1204     // Make load
1205     LoadPNode *load = new LoadPNode(NULL, in_mem, in_adr, adr_type, load_type, MemNode::unordered,
1206                                     LoadNode::DependsOnlyOnTest);
1207     load->set_barrier_expanded();
1208     igvn.register_new_node_with_optimizer(load);
1209     igvn.replace_node(old_node, zclone);
1210 
1211     Node *barrier = new LoadBarrierNode(C, NULL, in_mem, load, in_adr, is_weak);
1212     Node *barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
1213     Node *barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
1214 
1215     igvn.register_new_node_with_optimizer(barrier);
1216     igvn.register_new_node_with_optimizer(barrier_val);
1217     igvn.register_new_node_with_optimizer(barrier_ctrl);
1218 
1219     // loop over all of in_ctrl usages and move to barrier_ctrl
1220     for (DUIterator_Last imin, i = in_ctrl->last_outs(imin); i >= imin; --i) {
1221       Node *use = in_ctrl->last_out(i);
1222       uint l;
1223       for (l = 0; use->in(l) != in_ctrl; l++) {}
1224       igvn.replace_input_of(use, l, barrier_ctrl);
1225     }
1226 
1227     load->set_req(MemNode::Control, in_ctrl);
1228     barrier->set_req(LoadBarrierNode::Control, in_ctrl);
1229     zclone->add_req(barrier_val); // add req as keep alive.
1230 
1231     C->print_method(PHASE_ADD_UNSAFE_BARRIER, 4, zclone->_idx);
1232   }
1233 }
1234 
1235 void ZBarrierSetC2::insert_barriers_on_unsafe(PhaseIdealLoop* phase) const {
1236   Compile *C = phase->C;
1237   PhaseIterGVN &igvn = phase->igvn();
1238   uint new_ids = C->unique();
1239   VectorSet visited(Thread::current()->resource_area());
1240   GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
1241   nodeStack.push(C->root());
1242   visited.test_set(C->root()->_idx);
1243 
1244   // Traverse all nodes, visit all unsafe ops that require a barrier
1245   while (nodeStack.length() > 0) {
1246     Node *n = nodeStack.pop();
1247 
1248     bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
1249     if (is_old_node) {
1250       if (n->is_LoadStore()) {
1251         LoadStoreNode* lsn = n->as_LoadStore();
1252         if (lsn->has_barrier()) {
1253           BasicType bt = lsn->in(MemNode::Address)->bottom_type()->basic_type();
1254           assert ((bt == T_OBJECT || bt == T_ARRAY), "Sanity test");
1255           insert_barrier_before_unsafe(phase, lsn);
1256         }
1257       }
1258     }
1259     for (uint i = 0; i < n->len(); i++) {
1260       if (n->in(i)) {
1261         if (!visited.test_set(n->in(i)->_idx)) {
1262           nodeStack.push(n->in(i));
1263         }
1264       }
1265     }
1266   }
1267 
1268   igvn.optimize();
1269   C->print_method(PHASE_ADD_UNSAFE_BARRIER, 2);
1270 }
1271 
1272 // The purpose of ZBarrierSetC2::clean_catch_blocks is to prepare the IR for
1273 // splicing in load barrier nodes.
1274 //
1275 // The problem is that we might have instructions between a call and its catch nodes.
1276 // (This is usually handled in PhaseCFG:call_catch_cleanup, which clones mach nodes in
1277 // already scheduled blocks.) We can't have loads that require barriers there,
1278 // because we need to splice in new control flow, and that would violate the IR.
1279 //
1280 // clean_catch_blocks find all Loads that require a barrier and clone them and any
1281 // dependent instructions to each use. The loads must be in the beginning of the catch block
1282 // before any store.
1283 //
1284 // Sometimes the loads use will be at a place dominated by all catch blocks, then we need
1285 // a load in each catch block, and a Phi at the dominated use.
1286 
1287 void ZBarrierSetC2::clean_catch_blocks(PhaseIdealLoop* phase) const {
1288 
1289   Compile *C = phase->C;
1290   uint new_ids = C->unique();
1291   PhaseIterGVN &igvn = phase->igvn();
1292   VectorSet visited(Thread::current()->resource_area());
1293   GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
1294   nodeStack.push(C->root());
1295   visited.test_set(C->root()->_idx);
1296 
1297   // Traverse all nodes, visit all loads that require a barrier
1298   while(nodeStack.length() > 0) {
1299     Node *n = nodeStack.pop();
1300 
1301     bool is_old_node = (n->_idx < new_ids); // don't process nodes that were created during cleanup
1302     if (n->is_Load() && is_old_node) {
1303       LoadNode* load = n->isa_Load();
1304       // only care about loads that will have a barrier
1305       if (load->is_barrier_required()) {
1306         process_catch_cleanup_candidate(phase, load);
1307       }
1308     }
1309 
1310     for (uint i = 0; i < n->len(); i++) {
1311       if (n->in(i)) {
1312         if (!visited.test_set(n->in(i)->_idx)) {
1313           nodeStack.push(n->in(i));
1314         }
1315       }
1316     }
1317   }
1318 
1319   C->print_method(PHASE_CALL_CATCH_CLEANUP, 2);
1320 }
1321 
1322 class DomDepthCompareClosure : public CompareClosure<LoadNode*> {
1323   PhaseIdealLoop* _phase;
1324 
1325 public:
1326   DomDepthCompareClosure(PhaseIdealLoop* phase) : _phase(phase) { }
1327 
1328   int do_compare(LoadNode* const &n1, LoadNode* const &n2) {
1329     int d1 = _phase->dom_depth(_phase->get_ctrl(n1));
1330     int d2 = _phase->dom_depth(_phase->get_ctrl(n2));
1331     if (d1 == d2) {
1332       // Compare index if the depth is the same, ensures all entries are unique.
1333       return n1->_idx - n2->_idx;
1334     } else {
1335       return d2 - d1;
1336     }
1337   }
1338 };
1339 
1340 // Traverse graph and add all loadPs to list, sorted by dom depth
1341 void gather_loadnodes_sorted(PhaseIdealLoop* phase, GrowableArray<LoadNode*>* loadList) {
1342 
1343   VectorSet visited(Thread::current()->resource_area());
1344   GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
1345   DomDepthCompareClosure ddcc(phase);
1346 
1347   nodeStack.push(phase->C->root());
1348   while(nodeStack.length() > 0) {
1349     Node *n = nodeStack.pop();
1350     if (visited.test(n->_idx)) {
1351       continue;
1352     }
1353 
1354     if (n->isa_Load()) {
1355       LoadNode *load = n->as_Load();
1356       if (load->is_barrier_required()) {
1357         assert(phase->get_ctrl(load) != NULL, "sanity");
1358         assert(phase->dom_depth(phase->get_ctrl(load)) != 0, "sanity");
1359         loadList->insert_sorted(&ddcc, load);
1360       }
1361     }
1362 
1363     visited.set(n->_idx);
1364     for (uint i = 0; i < n->req(); i++) {
1365       if (n->in(i)) {
1366         if (!visited.test(n->in(i)->_idx)) {
1367           nodeStack.push(n->in(i));
1368         }
1369       }
1370     }
1371   }
1372 }
1373 
1374 // Add LoadBarriers to all LoadPs
1375 void ZBarrierSetC2::insert_load_barriers(PhaseIdealLoop* phase) const {
1376 
1377   bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
1378   GrowableArray<LoadNode *> loadList(Thread::current()->resource_area(), 0, 0, NULL);
1379   gather_loadnodes_sorted(phase, &loadList);
1380 
1381   PhaseIterGVN &igvn = phase->igvn();
1382   int count = 0;
1383 
1384   for (GrowableArrayIterator<LoadNode *> loadIter = loadList.begin(); loadIter != loadList.end(); ++loadIter) {
1385     LoadNode *load = *loadIter;
1386 
1387     if (load->is_barrier_expanded()) {
1388       continue;
1389     }
1390 
1391     do {
1392       // Insert a barrier on a loadP
1393       // if another load is found that needs to be expanded first, retry on that one
1394       LoadNode* result = insert_one_loadbarrier(phase, load, phase->get_ctrl(load));
1395       while (result != NULL) {
1396         result = insert_one_loadbarrier(phase, result, phase->get_ctrl(result));
1397       }
1398     } while (!load->is_barrier_expanded());
1399   }
1400 
1401   phase->C->print_method(PHASE_INSERT_BARRIER, 2);
1402 }
1403 
1404 void push_antidependent_stores(PhaseIdealLoop* phase, Node_Stack& nodestack, LoadNode* start_load) {
1405   // push all stores on the same mem, that can_alias
1406   // Any load found must be handled first
1407   PhaseIterGVN &igvn = phase->igvn();
1408   int load_alias_idx = igvn.C->get_alias_index(start_load->adr_type());
1409 
1410   Node *mem = start_load->in(1);
1411   for (DUIterator_Fast imax, u = mem->fast_outs(imax); u < imax; u++) {
1412     Node *mem_use = mem->fast_out(u);
1413 
1414     if (mem_use == start_load) continue;
1415     if (!mem_use->is_Store()) continue;
1416     if (!phase->has_ctrl(mem_use)) continue;
1417     if (phase->get_ctrl(mem_use) != phase->get_ctrl(start_load)) continue;
1418 
1419     // add any aliasing store in this block
1420     StoreNode *store = mem_use->isa_Store();
1421     const TypePtr *adr_type = store->adr_type();
1422     if (igvn.C->can_alias(adr_type, load_alias_idx)) {
1423       nodestack.push(store, 0);
1424     }
1425   }
1426 }
1427 
1428 LoadNode* ZBarrierSetC2::insert_one_loadbarrier(PhaseIdealLoop* phase, LoadNode* start_load, Node* ctrl) const {
1429   bool trace = phase->C->directive()->ZTraceLoadBarriersOption;
1430   PhaseIterGVN &igvn = phase->igvn();
1431 
1432   // Check for other loadPs at the same loop depth that is reachable by a DFS
1433   // - if found - return it. It needs to be inserted first
1434   // - otherwise proceed and insert barrier
1435 
1436   VectorSet visited(Thread::current()->resource_area());
1437   Node_Stack nodestack(100);
1438 
1439   nodestack.push(start_load, 0);
1440   push_antidependent_stores(phase, nodestack, start_load);
1441 
1442   while(!nodestack.is_empty()) {
1443     Node* n = nodestack.node(); // peek
1444     nodestack.pop();
1445     if (visited.test(n->_idx)) {
1446       continue;
1447     }
1448 
1449     if (n->is_Load() && n != start_load && n->as_Load()->is_barrier_required() && !n->as_Load()->is_barrier_expanded()) {
1450       // Found another load that needs a barrier in the same block. Must expand later loads first.
1451       if (trace) tty->print_cr(" * Found LoadP %i on DFS", n->_idx);
1452       return n->as_Load(); // return node that should be expanded first
1453     }
1454 
1455     if (!phase->has_ctrl(n)) continue;
1456     if (phase->get_ctrl(n) != phase->get_ctrl(start_load)) continue;
1457     if (n->is_Phi()) continue;
1458 
1459     visited.set(n->_idx);
1460     // push all children
1461     for (DUIterator_Fast imax, ii = n->fast_outs(imax); ii < imax; ii++) {
1462       Node* c = n->fast_out(ii);
1463       if (c != NULL) {
1464         nodestack.push(c, 0);
1465       }
1466     }
1467   }
1468 
1469   insert_one_loadbarrier_inner(phase, start_load, ctrl, visited);
1470   return NULL;
1471 }
1472 
1473 void ZBarrierSetC2::insert_one_loadbarrier_inner(PhaseIdealLoop* phase, LoadNode* load, Node* ctrl, VectorSet visited2) const {
1474   PhaseIterGVN &igvn = phase->igvn();
1475   Compile* C = igvn.C;
1476   bool trace = C->directive()->ZTraceLoadBarriersOption;
1477 
1478   // create barrier
1479   Node* barrier = new LoadBarrierNode(C, NULL, load->in(LoadNode::Memory), NULL, load->in(LoadNode::Address), load->is_barrier_weak());
1480   Node* barrier_val = new ProjNode(barrier, LoadBarrierNode::Oop);
1481   Node* barrier_ctrl = new ProjNode(barrier, LoadBarrierNode::Control);
1482 
1483   if (trace) tty->print_cr("Insert load %i with barrier: %i and ctrl : %i", load->_idx, barrier->_idx, ctrl->_idx);
1484 
1485   // Splice control
1486   // - insert barrier control diamond between loads ctrl and ctrl successor on path to block end.
1487   // - If control successor is a catch, step over to next.
1488   Node* ctrl_succ = NULL;
1489   for (DUIterator_Fast imax, j = ctrl->fast_outs(imax); j < imax; j++) {
1490     Node* tmp = ctrl->fast_out(j);
1491 
1492     // - CFG nodes is the ones we are going to splice (1 only!)
1493     // - Phi nodes will continue to hang from the region node!
1494     // - self loops should be skipped
1495     if (tmp->is_Phi() || tmp == ctrl) {
1496       continue;
1497     }
1498 
1499     if (tmp->is_CFG()) {
1500       assert(ctrl_succ == NULL, "There can be only one");
1501       ctrl_succ = tmp;
1502       continue;
1503     }
1504   }
1505 
1506   // Now splice control
1507   assert(ctrl_succ != load, "sanity");
1508   assert(ctrl_succ != NULL, "Broken IR");
1509   bool found = false;
1510   for(uint k = 0; k < ctrl_succ->req(); k++) {
1511     if (ctrl_succ->in(k) == ctrl) {
1512       assert(!found, "sanity");
1513       if (trace) tty->print_cr(" Move CFG ctrl_succ %i to barrier_ctrl", ctrl_succ->_idx);
1514       igvn.replace_input_of(ctrl_succ, k, barrier_ctrl);
1515       found = true;
1516       k--;
1517     }
1518   }
1519 
1520   // For all successors of ctrl - move all visited to become successors of barrier_ctrl instead
1521   for (DUIterator_Fast imax, r = ctrl->fast_outs(imax); r < imax; r++) {
1522     Node* tmp = ctrl->fast_out(r);
1523     if (visited2.test(tmp->_idx) && (tmp != load)) {
1524       if (trace) tty->print_cr(" Move ctrl_succ %i to barrier_ctrl", tmp->_idx);
1525       igvn.replace_input_of(tmp, 0, barrier_ctrl);
1526       --r; --imax;
1527     }
1528   }
1529 
1530   // Move the loads user to the barrier
1531   for (DUIterator_Fast imax, i = load->fast_outs(imax); i < imax; i++) {
1532     Node* u = load->fast_out(i);
1533     if (u->isa_LoadBarrier()) {
1534       continue;
1535     }
1536 
1537     // find correct input  - replace with iterator?
1538     for(uint j = 0; j < u->req(); j++) {
1539       if (u->in(j) == load) {
1540         igvn.replace_input_of(u, j, barrier_val);
1541         --i; --imax; // Adjust the iterator of the *outer* loop
1542         break; // some nodes (calls) might have several uses from the same node
1543       }
1544     }
1545   }
1546 
1547   // Connect barrier to load and control
1548   barrier->set_req(LoadBarrierNode::Oop, load);
1549   barrier->set_req(LoadBarrierNode::Control, ctrl);
1550 
1551   igvn.rehash_node_delayed(load);
1552   igvn.register_new_node_with_optimizer(barrier);
1553   igvn.register_new_node_with_optimizer(barrier_val);
1554   igvn.register_new_node_with_optimizer(barrier_ctrl);
1555   load->set_barrier_expanded();
1556 
1557   C->print_method(PHASE_INSERT_BARRIER, 3, load->_idx);
1558 }
1559 
1560 // The bad_mask in the ThreadLocalData shouldn't have an anti-dep-check.
1561 // The bad_mask address if of type TypeRawPtr, but that will alias
1562 // InitializeNodes until the type system is expanded.
1563 bool ZBarrierSetC2::needs_anti_dependence_check(const Node* node) const {
1564   MachNode* mnode = node->as_Mach();
1565   if (mnode != NULL) {
1566     intptr_t offset = 0;
1567     const TypePtr *adr_type2 = NULL;
1568     const Node* base = mnode->get_base_and_disp(offset, adr_type2);
1569     if ((base != NULL) &&
1570         (base->is_Mach() && base->as_Mach()->ideal_Opcode() == Op_ThreadLocal) &&
1571         (offset == in_bytes(ZThreadLocalData::address_bad_mask_offset()))) {
1572       return false;
1573     }
1574   }
1575   return true;
1576 }