1 /*
   2  * Copyright (c) 2015, 2019, Red Hat, Inc. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 
  27 #include "gc/shenandoah/c2/shenandoahSupport.hpp"
  28 #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp"
  29 #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
  30 #include "gc/shenandoah/shenandoahForwarding.hpp"
  31 #include "gc/shenandoah/shenandoahHeap.hpp"
  32 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  33 #include "gc/shenandoah/shenandoahRuntime.hpp"
  34 #include "gc/shenandoah/shenandoahThreadLocalData.hpp"
  35 #include "opto/arraycopynode.hpp"
  36 #include "opto/block.hpp"
  37 #include "opto/callnode.hpp"
  38 #include "opto/castnode.hpp"
  39 #include "opto/movenode.hpp"
  40 #include "opto/phaseX.hpp"
  41 #include "opto/rootnode.hpp"
  42 #include "opto/runtime.hpp"
  43 #include "opto/subnode.hpp"
  44 
  45 bool ShenandoahBarrierC2Support::expand(Compile* C, PhaseIterGVN& igvn) {
  46   ShenandoahBarrierSetC2State* state = ShenandoahBarrierSetC2::bsc2()->state();
  47   if ((state->enqueue_barriers_count() +
  48        state->load_reference_barriers_count()) > 0) {
  49     bool attempt_more_loopopts = ShenandoahLoopOptsAfterExpansion;
  50     C->clear_major_progress();
  51     PhaseIdealLoop ideal_loop(igvn, LoopOptsShenandoahExpand);
  52     if (C->failing()) return false;
  53     PhaseIdealLoop::verify(igvn);
  54     DEBUG_ONLY(verify_raw_mem(C->root());)
  55     if (attempt_more_loopopts) {
  56       C->set_major_progress();
  57       if (!C->optimize_loops(igvn, LoopOptsShenandoahPostExpand)) {
  58         return false;
  59       }
  60       C->clear_major_progress();
  61       if (C->range_check_cast_count() > 0) {
  62         // No more loop optimizations. Remove all range check dependent CastIINodes.
  63         C->remove_range_check_casts(igvn);
  64         igvn.optimize();
  65       }
  66     }
  67   }
  68   return true;
  69 }
  70 
  71 bool ShenandoahBarrierC2Support::is_gc_state_test(Node* iff, int mask) {
  72   if (!UseShenandoahGC) {
  73     return false;
  74   }
  75   assert(iff->is_If(), "bad input");
  76   if (iff->Opcode() != Op_If) {
  77     return false;
  78   }
  79   Node* bol = iff->in(1);
  80   if (!bol->is_Bool() || bol->as_Bool()->_test._test != BoolTest::ne) {
  81     return false;
  82   }
  83   Node* cmp = bol->in(1);
  84   if (cmp->Opcode() != Op_CmpI) {
  85     return false;
  86   }
  87   Node* in1 = cmp->in(1);
  88   Node* in2 = cmp->in(2);
  89   if (in2->find_int_con(-1) != 0) {
  90     return false;
  91   }
  92   if (in1->Opcode() != Op_AndI) {
  93     return false;
  94   }
  95   in2 = in1->in(2);
  96   if (in2->find_int_con(-1) != mask) {
  97     return false;
  98   }
  99   in1 = in1->in(1);
 100 
 101   return is_gc_state_load(in1);
 102 }
 103 
 104 bool ShenandoahBarrierC2Support::is_heap_stable_test(Node* iff) {
 105   return is_gc_state_test(iff, ShenandoahHeap::HAS_FORWARDED);
 106 }
 107 
 108 bool ShenandoahBarrierC2Support::is_gc_state_load(Node *n) {
 109   if (!UseShenandoahGC) {
 110     return false;
 111   }
 112   if (n->Opcode() != Op_LoadB && n->Opcode() != Op_LoadUB) {
 113     return false;
 114   }
 115   Node* addp = n->in(MemNode::Address);
 116   if (!addp->is_AddP()) {
 117     return false;
 118   }
 119   Node* base = addp->in(AddPNode::Address);
 120   Node* off = addp->in(AddPNode::Offset);
 121   if (base->Opcode() != Op_ThreadLocal) {
 122     return false;
 123   }
 124   if (off->find_intptr_t_con(-1) != in_bytes(ShenandoahThreadLocalData::gc_state_offset())) {
 125     return false;
 126   }
 127   return true;
 128 }
 129 
 130 bool ShenandoahBarrierC2Support::has_safepoint_between(Node* start, Node* stop, PhaseIdealLoop *phase) {
 131   assert(phase->is_dominator(stop, start), "bad inputs");
 132   ResourceMark rm;
 133   Unique_Node_List wq;
 134   wq.push(start);
 135   for (uint next = 0; next < wq.size(); next++) {
 136     Node *m = wq.at(next);
 137     if (m == stop) {
 138       continue;
 139     }
 140     if (m->is_SafePoint() && !m->is_CallLeaf()) {
 141       return true;
 142     }
 143     if (m->is_Region()) {
 144       for (uint i = 1; i < m->req(); i++) {
 145         wq.push(m->in(i));
 146       }
 147     } else {
 148       wq.push(m->in(0));
 149     }
 150   }
 151   return false;
 152 }
 153 
 154 #ifdef ASSERT
 155 bool ShenandoahBarrierC2Support::verify_helper(Node* in, Node_Stack& phis, VectorSet& visited, verify_type t, bool trace, Unique_Node_List& barriers_used) {
 156   assert(phis.size() == 0, "");
 157 
 158   while (true) {
 159     if (in->bottom_type() == TypePtr::NULL_PTR) {
 160       if (trace) {tty->print_cr("NULL");}
 161     } else if (!in->bottom_type()->make_ptr()->make_oopptr()) {
 162       if (trace) {tty->print_cr("Non oop");}
 163     } else {
 164       if (in->is_ConstraintCast()) {
 165         in = in->in(1);
 166         continue;
 167       } else if (in->is_AddP()) {
 168         assert(!in->in(AddPNode::Address)->is_top(), "no raw memory access");
 169         in = in->in(AddPNode::Address);
 170         continue;
 171       } else if (in->is_Con()) {
 172         if (trace) {
 173           tty->print("Found constant");
 174           in->dump();
 175         }
 176       } else if (in->Opcode() == Op_Parm) {
 177         if (trace) {
 178           tty->print("Found argument");
 179         }
 180       } else if (in->Opcode() == Op_CreateEx) {
 181         if (trace) {
 182           tty->print("Found create-exception");
 183         }
 184       } else if (in->Opcode() == Op_LoadP && in->adr_type() == TypeRawPtr::BOTTOM) {
 185         if (trace) {
 186           tty->print("Found raw LoadP (OSR argument?)");
 187         }
 188       } else if (in->Opcode() == Op_ShenandoahLoadReferenceBarrier) {
 189         if (t == ShenandoahOopStore) {
 190           uint i = 0;
 191           for (; i < phis.size(); i++) {
 192             Node* n = phis.node_at(i);
 193             if (n->Opcode() == Op_ShenandoahEnqueueBarrier) {
 194               break;
 195             }
 196           }
 197           if (i == phis.size()) {
 198             return false;
 199           }
 200         }
 201         barriers_used.push(in);
 202         if (trace) {tty->print("Found barrier"); in->dump();}
 203       } else if (in->Opcode() == Op_ShenandoahEnqueueBarrier) {
 204         if (t != ShenandoahOopStore) {
 205           in = in->in(1);
 206           continue;
 207         }
 208         if (trace) {tty->print("Found enqueue barrier"); in->dump();}
 209         phis.push(in, in->req());
 210         in = in->in(1);
 211         continue;
 212       } else if (in->is_Proj() && in->in(0)->is_Allocate()) {
 213         if (trace) {
 214           tty->print("Found alloc");
 215           in->in(0)->dump();
 216         }
 217       } else if (in->is_Proj() && (in->in(0)->Opcode() == Op_CallStaticJava || in->in(0)->Opcode() == Op_CallDynamicJava)) {
 218         if (trace) {
 219           tty->print("Found Java call");
 220         }
 221       } else if (in->is_Phi()) {
 222         if (!visited.test_set(in->_idx)) {
 223           if (trace) {tty->print("Pushed phi:"); in->dump();}
 224           phis.push(in, 2);
 225           in = in->in(1);
 226           continue;
 227         }
 228         if (trace) {tty->print("Already seen phi:"); in->dump();}
 229       } else if (in->Opcode() == Op_CMoveP || in->Opcode() == Op_CMoveN) {
 230         if (!visited.test_set(in->_idx)) {
 231           if (trace) {tty->print("Pushed cmovep:"); in->dump();}
 232           phis.push(in, CMoveNode::IfTrue);
 233           in = in->in(CMoveNode::IfFalse);
 234           continue;
 235         }
 236         if (trace) {tty->print("Already seen cmovep:"); in->dump();}
 237       } else if (in->Opcode() == Op_EncodeP || in->Opcode() == Op_DecodeN) {
 238         in = in->in(1);
 239         continue;
 240       } else {
 241         return false;
 242       }
 243     }
 244     bool cont = false;
 245     while (phis.is_nonempty()) {
 246       uint idx = phis.index();
 247       Node* phi = phis.node();
 248       if (idx >= phi->req()) {
 249         if (trace) {tty->print("Popped phi:"); phi->dump();}
 250         phis.pop();
 251         continue;
 252       }
 253       if (trace) {tty->print("Next entry(%d) for phi:", idx); phi->dump();}
 254       in = phi->in(idx);
 255       phis.set_index(idx+1);
 256       cont = true;
 257       break;
 258     }
 259     if (!cont) {
 260       break;
 261     }
 262   }
 263   return true;
 264 }
 265 
 266 void ShenandoahBarrierC2Support::report_verify_failure(const char* msg, Node* n1, Node* n2) {
 267   if (n1 != NULL) {
 268     n1->dump(+10);
 269   }
 270   if (n2 != NULL) {
 271     n2->dump(+10);
 272   }
 273   fatal("%s", msg);
 274 }
 275 
 276 void ShenandoahBarrierC2Support::verify(RootNode* root) {
 277   ResourceMark rm;
 278   Unique_Node_List wq;
 279   GrowableArray<Node*> barriers;
 280   Unique_Node_List barriers_used;
 281   Node_Stack phis(0);
 282   VectorSet visited(Thread::current()->resource_area());
 283   const bool trace = false;
 284   const bool verify_no_useless_barrier = false;
 285 
 286   wq.push(root);
 287   for (uint next = 0; next < wq.size(); next++) {
 288     Node *n = wq.at(next);
 289     if (n->is_Load()) {
 290       const bool trace = false;
 291       if (trace) {tty->print("Verifying"); n->dump();}
 292       if (n->Opcode() == Op_LoadRange || n->Opcode() == Op_LoadKlass || n->Opcode() == Op_LoadNKlass) {
 293         if (trace) {tty->print_cr("Load range/klass");}
 294       } else {
 295         const TypePtr* adr_type = n->as_Load()->adr_type();
 296 
 297         if (adr_type->isa_oopptr() && adr_type->is_oopptr()->offset() == oopDesc::mark_offset_in_bytes()) {
 298           if (trace) {tty->print_cr("Mark load");}
 299         } else if (adr_type->isa_instptr() &&
 300                    adr_type->is_instptr()->klass()->is_subtype_of(Compile::current()->env()->Reference_klass()) &&
 301                    adr_type->is_instptr()->offset() == java_lang_ref_Reference::referent_offset) {
 302           if (trace) {tty->print_cr("Reference.get()");}
 303         } else if (!verify_helper(n->in(MemNode::Address), phis, visited, ShenandoahLoad, trace, barriers_used)) {
 304           report_verify_failure("Shenandoah verification: Load should have barriers", n);
 305         }
 306       }
 307     } else if (n->is_Store()) {
 308       const bool trace = false;
 309 
 310       if (trace) {tty->print("Verifying"); n->dump();}
 311       if (n->in(MemNode::ValueIn)->bottom_type()->make_oopptr()) {
 312         Node* adr = n->in(MemNode::Address);
 313         bool verify = true;
 314 
 315         if (adr->is_AddP() && adr->in(AddPNode::Base)->is_top()) {
 316           adr = adr->in(AddPNode::Address);
 317           if (adr->is_AddP()) {
 318             assert(adr->in(AddPNode::Base)->is_top(), "");
 319             adr = adr->in(AddPNode::Address);
 320             if (adr->Opcode() == Op_LoadP &&
 321                 adr->in(MemNode::Address)->in(AddPNode::Base)->is_top() &&
 322                 adr->in(MemNode::Address)->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
 323                 adr->in(MemNode::Address)->in(AddPNode::Offset)->find_intptr_t_con(-1) == in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())) {
 324               if (trace) {tty->print_cr("SATB prebarrier");}
 325               verify = false;
 326             }
 327           }
 328         }
 329 
 330         if (verify && !verify_helper(n->in(MemNode::ValueIn), phis, visited, ShenandoahStoreValEnqueueBarrier ? ShenandoahOopStore : ShenandoahValue, trace, barriers_used)) {
 331           report_verify_failure("Shenandoah verification: Store should have barriers", n);
 332         }
 333       }
 334       if (!verify_helper(n->in(MemNode::Address), phis, visited, ShenandoahStore, trace, barriers_used)) {
 335         report_verify_failure("Shenandoah verification: Store (address) should have barriers", n);
 336       }
 337     } else if (n->Opcode() == Op_CmpP) {
 338       const bool trace = false;
 339 
 340       Node* in1 = n->in(1);
 341       Node* in2 = n->in(2);
 342       if (in1->bottom_type()->isa_oopptr()) {
 343         if (trace) {tty->print("Verifying"); n->dump();}
 344 
 345         bool mark_inputs = false;
 346         if (in1->bottom_type() == TypePtr::NULL_PTR || in2->bottom_type() == TypePtr::NULL_PTR ||
 347             (in1->is_Con() || in2->is_Con())) {
 348           if (trace) {tty->print_cr("Comparison against a constant");}
 349           mark_inputs = true;
 350         } else if ((in1->is_CheckCastPP() && in1->in(1)->is_Proj() && in1->in(1)->in(0)->is_Allocate()) ||
 351                    (in2->is_CheckCastPP() && in2->in(1)->is_Proj() && in2->in(1)->in(0)->is_Allocate())) {
 352           if (trace) {tty->print_cr("Comparison with newly alloc'ed object");}
 353           mark_inputs = true;
 354         } else {
 355           assert(in2->bottom_type()->isa_oopptr(), "");
 356 
 357           if (!verify_helper(in1, phis, visited, ShenandoahStore, trace, barriers_used) ||
 358               !verify_helper(in2, phis, visited, ShenandoahStore, trace, barriers_used)) {
 359             report_verify_failure("Shenandoah verification: Cmp should have barriers", n);
 360           }
 361         }
 362         if (verify_no_useless_barrier &&
 363             mark_inputs &&
 364             (!verify_helper(in1, phis, visited, ShenandoahValue, trace, barriers_used) ||
 365              !verify_helper(in2, phis, visited, ShenandoahValue, trace, barriers_used))) {
 366           phis.clear();
 367           visited.reset();
 368         }
 369       }
 370     } else if (n->is_LoadStore()) {
 371       if (n->in(MemNode::ValueIn)->bottom_type()->make_ptr() &&
 372           !verify_helper(n->in(MemNode::ValueIn), phis, visited, ShenandoahStoreValEnqueueBarrier ? ShenandoahOopStore : ShenandoahValue, trace, barriers_used)) {
 373         report_verify_failure("Shenandoah verification: LoadStore (value) should have barriers", n);
 374       }
 375 
 376       if (n->in(MemNode::Address)->bottom_type()->make_oopptr() && !verify_helper(n->in(MemNode::Address), phis, visited, ShenandoahStore, trace, barriers_used)) {
 377         report_verify_failure("Shenandoah verification: LoadStore (address) should have barriers", n);
 378       }
 379     } else if (n->Opcode() == Op_CallLeafNoFP || n->Opcode() == Op_CallLeaf) {
 380       CallNode* call = n->as_Call();
 381 
 382       static struct {
 383         const char* name;
 384         struct {
 385           int pos;
 386           verify_type t;
 387         } args[6];
 388       } calls[] = {
 389         "aescrypt_encryptBlock",
 390         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+1, ShenandoahStore },  { TypeFunc::Parms+2, ShenandoahLoad },
 391           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 392         "aescrypt_decryptBlock",
 393         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+1, ShenandoahStore },  { TypeFunc::Parms+2, ShenandoahLoad },
 394           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 395         "multiplyToLen",
 396         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+2, ShenandoahLoad },   { TypeFunc::Parms+4, ShenandoahStore },
 397           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 398         "squareToLen",
 399         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+2, ShenandoahLoad },   { -1,  ShenandoahNone},
 400           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 401         "montgomery_multiply",
 402         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+1, ShenandoahLoad },   { TypeFunc::Parms+2, ShenandoahLoad },
 403           { TypeFunc::Parms+6, ShenandoahStore }, { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 404         "montgomery_square",
 405         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+1, ShenandoahLoad },   { TypeFunc::Parms+5, ShenandoahStore },
 406           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 407         "mulAdd",
 408         { { TypeFunc::Parms, ShenandoahStore },  { TypeFunc::Parms+1, ShenandoahLoad },   { -1,  ShenandoahNone},
 409           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 410         "vectorizedMismatch",
 411         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+1, ShenandoahLoad },   { -1,  ShenandoahNone},
 412           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 413         "updateBytesCRC32",
 414         { { TypeFunc::Parms+1, ShenandoahLoad }, { -1,  ShenandoahNone},                  { -1,  ShenandoahNone},
 415           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 416         "updateBytesAdler32",
 417         { { TypeFunc::Parms+1, ShenandoahLoad }, { -1,  ShenandoahNone},                  { -1,  ShenandoahNone},
 418           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 419         "updateBytesCRC32C",
 420         { { TypeFunc::Parms+1, ShenandoahLoad }, { TypeFunc::Parms+3, ShenandoahLoad},    { -1,  ShenandoahNone},
 421           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 422         "counterMode_AESCrypt",
 423         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+1, ShenandoahStore },  { TypeFunc::Parms+2, ShenandoahLoad },
 424           { TypeFunc::Parms+3, ShenandoahStore }, { TypeFunc::Parms+5, ShenandoahStore }, { TypeFunc::Parms+6, ShenandoahStore } },
 425         "cipherBlockChaining_encryptAESCrypt",
 426         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+1, ShenandoahStore },  { TypeFunc::Parms+2, ShenandoahLoad },
 427           { TypeFunc::Parms+3, ShenandoahLoad },  { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 428         "cipherBlockChaining_decryptAESCrypt",
 429         { { TypeFunc::Parms, ShenandoahLoad },   { TypeFunc::Parms+1, ShenandoahStore },  { TypeFunc::Parms+2, ShenandoahLoad },
 430           { TypeFunc::Parms+3, ShenandoahLoad },  { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 431         "shenandoah_clone_barrier",
 432         { { TypeFunc::Parms, ShenandoahLoad },   { -1,  ShenandoahNone},                  { -1,  ShenandoahNone},
 433           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 434         "ghash_processBlocks",
 435         { { TypeFunc::Parms, ShenandoahStore },  { TypeFunc::Parms+1, ShenandoahLoad },   { TypeFunc::Parms+2, ShenandoahLoad },
 436           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 437         "sha1_implCompress",
 438         { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+1, ShenandoahStore },   { -1, ShenandoahNone },
 439           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 440         "sha256_implCompress",
 441         { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+1, ShenandoahStore },   { -1, ShenandoahNone },
 442           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 443         "sha512_implCompress",
 444         { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+1, ShenandoahStore },   { -1, ShenandoahNone },
 445           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 446         "sha1_implCompressMB",
 447         { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+1, ShenandoahStore },   { -1, ShenandoahNone },
 448           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 449         "sha256_implCompressMB",
 450         { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+1, ShenandoahStore },   { -1, ShenandoahNone },
 451           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 452         "sha512_implCompressMB",
 453         { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+1, ShenandoahStore },   { -1, ShenandoahNone },
 454           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 455         "encodeBlock",
 456         { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+3, ShenandoahStore },   { -1, ShenandoahNone },
 457           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
 458       };
 459 
 460       if (call->is_call_to_arraycopystub()) {
 461         Node* dest = NULL;
 462         const TypeTuple* args = n->as_Call()->_tf->domain();
 463         for (uint i = TypeFunc::Parms, j = 0; i < args->cnt(); i++) {
 464           if (args->field_at(i)->isa_ptr()) {
 465             j++;
 466             if (j == 2) {
 467               dest = n->in(i);
 468               break;
 469             }
 470           }
 471         }
 472         if (!verify_helper(n->in(TypeFunc::Parms), phis, visited, ShenandoahLoad, trace, barriers_used) ||
 473             !verify_helper(dest, phis, visited, ShenandoahStore, trace, barriers_used)) {
 474           report_verify_failure("Shenandoah verification: ArrayCopy should have barriers", n);
 475         }
 476       } else if (strlen(call->_name) > 5 &&
 477                  !strcmp(call->_name + strlen(call->_name) - 5, "_fill")) {
 478         if (!verify_helper(n->in(TypeFunc::Parms), phis, visited, ShenandoahStore, trace, barriers_used)) {
 479           report_verify_failure("Shenandoah verification: _fill should have barriers", n);
 480         }
 481       } else if (!strcmp(call->_name, "shenandoah_wb_pre")) {
 482         // skip
 483       } else {
 484         const int calls_len = sizeof(calls) / sizeof(calls[0]);
 485         int i = 0;
 486         for (; i < calls_len; i++) {
 487           if (!strcmp(calls[i].name, call->_name)) {
 488             break;
 489           }
 490         }
 491         if (i != calls_len) {
 492           const uint args_len = sizeof(calls[0].args) / sizeof(calls[0].args[0]);
 493           for (uint j = 0; j < args_len; j++) {
 494             int pos = calls[i].args[j].pos;
 495             if (pos == -1) {
 496               break;
 497             }
 498             if (!verify_helper(call->in(pos), phis, visited, calls[i].args[j].t, trace, barriers_used)) {
 499               report_verify_failure("Shenandoah verification: intrinsic calls should have barriers", n);
 500             }
 501           }
 502           for (uint j = TypeFunc::Parms; j < call->req(); j++) {
 503             if (call->in(j)->bottom_type()->make_ptr() &&
 504                 call->in(j)->bottom_type()->make_ptr()->isa_oopptr()) {
 505               uint k = 0;
 506               for (; k < args_len && calls[i].args[k].pos != (int)j; k++);
 507               if (k == args_len) {
 508                 fatal("arg %d for call %s not covered", j, call->_name);
 509               }
 510             }
 511           }
 512         } else {
 513           for (uint j = TypeFunc::Parms; j < call->req(); j++) {
 514             if (call->in(j)->bottom_type()->make_ptr() &&
 515                 call->in(j)->bottom_type()->make_ptr()->isa_oopptr()) {
 516               fatal("%s not covered", call->_name);
 517             }
 518           }
 519         }
 520       }
 521     } else if (n->Opcode() == Op_ShenandoahEnqueueBarrier || n->Opcode() == Op_ShenandoahLoadReferenceBarrier) {
 522       // skip
 523     } else if (n->is_AddP()
 524                || n->is_Phi()
 525                || n->is_ConstraintCast()
 526                || n->Opcode() == Op_Return
 527                || n->Opcode() == Op_CMoveP
 528                || n->Opcode() == Op_CMoveN
 529                || n->Opcode() == Op_Rethrow
 530                || n->is_MemBar()
 531                || n->Opcode() == Op_Conv2B
 532                || n->Opcode() == Op_SafePoint
 533                || n->is_CallJava()
 534                || n->Opcode() == Op_Unlock
 535                || n->Opcode() == Op_EncodeP
 536                || n->Opcode() == Op_DecodeN) {
 537       // nothing to do
 538     } else {
 539       static struct {
 540         int opcode;
 541         struct {
 542           int pos;
 543           verify_type t;
 544         } inputs[2];
 545       } others[] = {
 546         Op_FastLock,
 547         { { 1, ShenandoahLoad },                  { -1, ShenandoahNone} },
 548         Op_Lock,
 549         { { TypeFunc::Parms, ShenandoahLoad },    { -1, ShenandoahNone} },
 550         Op_ArrayCopy,
 551         { { ArrayCopyNode::Src, ShenandoahLoad }, { ArrayCopyNode::Dest, ShenandoahStore } },
 552         Op_StrCompressedCopy,
 553         { { 2, ShenandoahLoad },                  { 3, ShenandoahStore } },
 554         Op_StrInflatedCopy,
 555         { { 2, ShenandoahLoad },                  { 3, ShenandoahStore } },
 556         Op_AryEq,
 557         { { 2, ShenandoahLoad },                  { 3, ShenandoahLoad } },
 558         Op_StrIndexOf,
 559         { { 2, ShenandoahLoad },                  { 4, ShenandoahLoad } },
 560         Op_StrComp,
 561         { { 2, ShenandoahLoad },                  { 4, ShenandoahLoad } },
 562         Op_StrEquals,
 563         { { 2, ShenandoahLoad },                  { 3, ShenandoahLoad } },
 564         Op_EncodeISOArray,
 565         { { 2, ShenandoahLoad },                  { 3, ShenandoahStore } },
 566         Op_HasNegatives,
 567         { { 2, ShenandoahLoad },                  { -1, ShenandoahNone} },
 568         Op_CastP2X,
 569         { { 1, ShenandoahLoad },                  { -1, ShenandoahNone} },
 570         Op_StrIndexOfChar,
 571         { { 2, ShenandoahLoad },                  { -1, ShenandoahNone } },
 572       };
 573 
 574       const int others_len = sizeof(others) / sizeof(others[0]);
 575       int i = 0;
 576       for (; i < others_len; i++) {
 577         if (others[i].opcode == n->Opcode()) {
 578           break;
 579         }
 580       }
 581       uint stop = n->is_Call() ? n->as_Call()->tf()->domain()->cnt() : n->req();
 582       if (i != others_len) {
 583         const uint inputs_len = sizeof(others[0].inputs) / sizeof(others[0].inputs[0]);
 584         for (uint j = 0; j < inputs_len; j++) {
 585           int pos = others[i].inputs[j].pos;
 586           if (pos == -1) {
 587             break;
 588           }
 589           if (!verify_helper(n->in(pos), phis, visited, others[i].inputs[j].t, trace, barriers_used)) {
 590             report_verify_failure("Shenandoah verification: intrinsic calls should have barriers", n);
 591           }
 592         }
 593         for (uint j = 1; j < stop; j++) {
 594           if (n->in(j) != NULL && n->in(j)->bottom_type()->make_ptr() &&
 595               n->in(j)->bottom_type()->make_ptr()->make_oopptr()) {
 596             uint k = 0;
 597             for (; k < inputs_len && others[i].inputs[k].pos != (int)j; k++);
 598             if (k == inputs_len) {
 599               fatal("arg %d for node %s not covered", j, n->Name());
 600             }
 601           }
 602         }
 603       } else {
 604         for (uint j = 1; j < stop; j++) {
 605           if (n->in(j) != NULL && n->in(j)->bottom_type()->make_ptr() &&
 606               n->in(j)->bottom_type()->make_ptr()->make_oopptr()) {
 607             fatal("%s not covered", n->Name());
 608           }
 609         }
 610       }
 611     }
 612 
 613     if (n->is_SafePoint()) {
 614       SafePointNode* sfpt = n->as_SafePoint();
 615       if (verify_no_useless_barrier && sfpt->jvms() != NULL) {
 616         for (uint i = sfpt->jvms()->scloff(); i < sfpt->jvms()->endoff(); i++) {
 617           if (!verify_helper(sfpt->in(i), phis, visited, ShenandoahLoad, trace, barriers_used)) {
 618             phis.clear();
 619             visited.reset();
 620           }
 621         }
 622       }
 623     }
 624   }
 625 
 626   if (verify_no_useless_barrier) {
 627     for (int i = 0; i < barriers.length(); i++) {
 628       Node* n = barriers.at(i);
 629       if (!barriers_used.member(n)) {
 630         tty->print("XXX useless barrier"); n->dump(-2);
 631         ShouldNotReachHere();
 632       }
 633     }
 634   }
 635 }
 636 #endif
 637 
 638 bool ShenandoahBarrierC2Support::is_dominator_same_ctrl(Node* c, Node* d, Node* n, PhaseIdealLoop* phase) {
 639   // That both nodes have the same control is not sufficient to prove
 640   // domination, verify that there's no path from d to n
 641   ResourceMark rm;
 642   Unique_Node_List wq;
 643   wq.push(d);
 644   for (uint next = 0; next < wq.size(); next++) {
 645     Node *m = wq.at(next);
 646     if (m == n) {
 647       return false;
 648     }
 649     if (m->is_Phi() && m->in(0)->is_Loop()) {
 650       assert(phase->ctrl_or_self(m->in(LoopNode::EntryControl)) != c, "following loop entry should lead to new control");
 651     } else {
 652       if (m->is_Store() || m->is_LoadStore()) {
 653         // Take anti-dependencies into account
 654         Node* mem = m->in(MemNode::Memory);
 655         for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
 656           Node* u = mem->fast_out(i);
 657           if (u->is_Load() && phase->C->can_alias(m->adr_type(), phase->C->get_alias_index(u->adr_type())) &&
 658               phase->ctrl_or_self(u) == c) {
 659             wq.push(u);
 660           }
 661         }
 662       }
 663       for (uint i = 0; i < m->req(); i++) {
 664         if (m->in(i) != NULL && phase->ctrl_or_self(m->in(i)) == c) {
 665           wq.push(m->in(i));
 666         }
 667       }
 668     }
 669   }
 670   return true;
 671 }
 672 
 673 bool ShenandoahBarrierC2Support::is_dominator(Node* d_c, Node* n_c, Node* d, Node* n, PhaseIdealLoop* phase) {
 674   if (d_c != n_c) {
 675     return phase->is_dominator(d_c, n_c);
 676   }
 677   return is_dominator_same_ctrl(d_c, d, n, phase);
 678 }
 679 
 680 Node* next_mem(Node* mem, int alias) {
 681   Node* res = NULL;
 682   if (mem->is_Proj()) {
 683     res = mem->in(0);
 684   } else if (mem->is_SafePoint() || mem->is_MemBar()) {
 685     res = mem->in(TypeFunc::Memory);
 686   } else if (mem->is_Phi()) {
 687     res = mem->in(1);
 688   } else if (mem->is_MergeMem()) {
 689     res = mem->as_MergeMem()->memory_at(alias);
 690   } else if (mem->is_Store() || mem->is_LoadStore() || mem->is_ClearArray()) {
 691     assert(alias = Compile::AliasIdxRaw, "following raw memory can't lead to a barrier");
 692     res = mem->in(MemNode::Memory);
 693   } else {
 694 #ifdef ASSERT
 695     mem->dump();
 696 #endif
 697     ShouldNotReachHere();
 698   }
 699   return res;
 700 }
 701 
 702 Node* ShenandoahBarrierC2Support::no_branches(Node* c, Node* dom, bool allow_one_proj, PhaseIdealLoop* phase) {
 703   Node* iffproj = NULL;
 704   while (c != dom) {
 705     Node* next = phase->idom(c);
 706     assert(next->unique_ctrl_out() == c || c->is_Proj() || c->is_Region(), "multiple control flow out but no proj or region?");
 707     if (c->is_Region()) {
 708       ResourceMark rm;
 709       Unique_Node_List wq;
 710       wq.push(c);
 711       for (uint i = 0; i < wq.size(); i++) {
 712         Node *n = wq.at(i);
 713         if (n == next) {
 714           continue;
 715         }
 716         if (n->is_Region()) {
 717           for (uint j = 1; j < n->req(); j++) {
 718             wq.push(n->in(j));
 719           }
 720         } else {
 721           wq.push(n->in(0));
 722         }
 723       }
 724       for (uint i = 0; i < wq.size(); i++) {
 725         Node *n = wq.at(i);
 726         assert(n->is_CFG(), "");
 727         if (n->is_Multi()) {
 728           for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
 729             Node* u = n->fast_out(j);
 730             if (u->is_CFG()) {
 731               if (!wq.member(u) && !u->as_Proj()->is_uncommon_trap_proj(Deoptimization::Reason_none)) {
 732                 return NodeSentinel;
 733               }
 734             }
 735           }
 736         }
 737       }
 738     } else  if (c->is_Proj()) {
 739       if (c->is_IfProj()) {
 740         if (c->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) != NULL) {
 741           // continue;
 742         } else {
 743           if (!allow_one_proj) {
 744             return NodeSentinel;
 745           }
 746           if (iffproj == NULL) {
 747             iffproj = c;
 748           } else {
 749             return NodeSentinel;
 750           }
 751         }
 752       } else if (c->Opcode() == Op_JumpProj) {
 753         return NodeSentinel; // unsupported
 754       } else if (c->Opcode() == Op_CatchProj) {
 755         return NodeSentinel; // unsupported
 756       } else if (c->Opcode() == Op_CProj && next->Opcode() == Op_NeverBranch) {
 757         return NodeSentinel; // unsupported
 758       } else {
 759         assert(next->unique_ctrl_out() == c, "unsupported branch pattern");
 760       }
 761     }
 762     c = next;
 763   }
 764   return iffproj;
 765 }
 766 
 767 Node* ShenandoahBarrierC2Support::dom_mem(Node* mem, Node* ctrl, int alias, Node*& mem_ctrl, PhaseIdealLoop* phase) {
 768   ResourceMark rm;
 769   VectorSet wq(Thread::current()->resource_area());
 770   wq.set(mem->_idx);
 771   mem_ctrl = phase->ctrl_or_self(mem);
 772   while (!phase->is_dominator(mem_ctrl, ctrl) || mem_ctrl == ctrl) {
 773     mem = next_mem(mem, alias);
 774     if (wq.test_set(mem->_idx)) {
 775       return NULL;
 776     }
 777     mem_ctrl = phase->ctrl_or_self(mem);
 778   }
 779   if (mem->is_MergeMem()) {
 780     mem = mem->as_MergeMem()->memory_at(alias);
 781     mem_ctrl = phase->ctrl_or_self(mem);
 782   }
 783   return mem;
 784 }
 785 
 786 Node* ShenandoahBarrierC2Support::find_bottom_mem(Node* ctrl, PhaseIdealLoop* phase) {
 787   Node* mem = NULL;
 788   Node* c = ctrl;
 789   do {
 790     if (c->is_Region()) {
 791       Node* phi_bottom = NULL;
 792       for (DUIterator_Fast imax, i = c->fast_outs(imax); i < imax && mem == NULL; i++) {
 793         Node* u = c->fast_out(i);
 794         if (u->is_Phi() && u->bottom_type() == Type::MEMORY) {
 795           if (u->adr_type() == TypePtr::BOTTOM) {
 796             mem = u;
 797           }
 798         }
 799       }
 800     } else {
 801       if (c->is_Call() && c->as_Call()->adr_type() != NULL) {
 802         CallProjections projs;
 803         c->as_Call()->extract_projections(&projs, true, false);
 804         if (projs.fallthrough_memproj != NULL) {
 805           if (projs.fallthrough_memproj->adr_type() == TypePtr::BOTTOM) {
 806             if (projs.catchall_memproj == NULL) {
 807               mem = projs.fallthrough_memproj;
 808             } else {
 809               if (phase->is_dominator(projs.fallthrough_catchproj, ctrl)) {
 810                 mem = projs.fallthrough_memproj;
 811               } else {
 812                 assert(phase->is_dominator(projs.catchall_catchproj, ctrl), "one proj must dominate barrier");
 813                 mem = projs.catchall_memproj;
 814               }
 815             }
 816           }
 817         } else {
 818           Node* proj = c->as_Call()->proj_out(TypeFunc::Memory);
 819           if (proj != NULL &&
 820               proj->adr_type() == TypePtr::BOTTOM) {
 821             mem = proj;
 822           }
 823         }
 824       } else {
 825         for (DUIterator_Fast imax, i = c->fast_outs(imax); i < imax; i++) {
 826           Node* u = c->fast_out(i);
 827           if (u->is_Proj() &&
 828               u->bottom_type() == Type::MEMORY &&
 829               u->adr_type() == TypePtr::BOTTOM) {
 830               assert(c->is_SafePoint() || c->is_MemBar() || c->is_Start(), "");
 831               assert(mem == NULL, "only one proj");
 832               mem = u;
 833           }
 834         }
 835         assert(!c->is_Call() || c->as_Call()->adr_type() != NULL || mem == NULL, "no mem projection expected");
 836       }
 837     }
 838     c = phase->idom(c);
 839   } while (mem == NULL);
 840   return mem;
 841 }
 842 
 843 void ShenandoahBarrierC2Support::follow_barrier_uses(Node* n, Node* ctrl, Unique_Node_List& uses, PhaseIdealLoop* phase) {
 844   for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
 845     Node* u = n->fast_out(i);
 846     if (!u->is_CFG() && phase->get_ctrl(u) == ctrl && (!u->is_Phi() || !u->in(0)->is_Loop() || u->in(LoopNode::LoopBackControl) != n)) {
 847       uses.push(u);
 848     }
 849   }
 850 }
 851 
 852 static void hide_strip_mined_loop(OuterStripMinedLoopNode* outer, CountedLoopNode* inner, PhaseIdealLoop* phase) {
 853   OuterStripMinedLoopEndNode* le = inner->outer_loop_end();
 854   Node* new_outer = new LoopNode(outer->in(LoopNode::EntryControl), outer->in(LoopNode::LoopBackControl));
 855   phase->register_control(new_outer, phase->get_loop(outer), outer->in(LoopNode::EntryControl));
 856   Node* new_le = new IfNode(le->in(0), le->in(1), le->_prob, le->_fcnt);
 857   phase->register_control(new_le, phase->get_loop(le), le->in(0));
 858   phase->lazy_replace(outer, new_outer);
 859   phase->lazy_replace(le, new_le);
 860   inner->clear_strip_mined();
 861 }
 862 
 863 void ShenandoahBarrierC2Support::test_gc_state(Node*& ctrl, Node* raw_mem, Node*& test_fail_ctrl,
 864                                                PhaseIdealLoop* phase, int flags) {
 865   PhaseIterGVN& igvn = phase->igvn();
 866   Node* old_ctrl = ctrl;
 867 
 868   Node* thread          = new ThreadLocalNode();
 869   Node* gc_state_offset = igvn.MakeConX(in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
 870   Node* gc_state_addr   = new AddPNode(phase->C->top(), thread, gc_state_offset);
 871   Node* gc_state        = new LoadBNode(old_ctrl, raw_mem, gc_state_addr,
 872                                         DEBUG_ONLY(phase->C->get_adr_type(Compile::AliasIdxRaw)) NOT_DEBUG(NULL),
 873                                         TypeInt::BYTE, MemNode::unordered);
 874   Node* gc_state_and    = new AndINode(gc_state, igvn.intcon(flags));
 875   Node* gc_state_cmp    = new CmpINode(gc_state_and, igvn.zerocon(T_INT));
 876   Node* gc_state_bool   = new BoolNode(gc_state_cmp, BoolTest::ne);
 877 
 878   IfNode* gc_state_iff  = new IfNode(old_ctrl, gc_state_bool, PROB_UNLIKELY(0.999), COUNT_UNKNOWN);
 879   ctrl                  = new IfTrueNode(gc_state_iff);
 880   test_fail_ctrl        = new IfFalseNode(gc_state_iff);
 881 
 882   IdealLoopTree* loop = phase->get_loop(ctrl);
 883   phase->register_control(gc_state_iff,   loop, old_ctrl);
 884   phase->register_control(ctrl,           loop, gc_state_iff);
 885   phase->register_control(test_fail_ctrl, loop, gc_state_iff);
 886 
 887   phase->register_new_node(thread,        old_ctrl);
 888   phase->register_new_node(gc_state_addr, old_ctrl);
 889   phase->register_new_node(gc_state,      old_ctrl);
 890   phase->register_new_node(gc_state_and,  old_ctrl);
 891   phase->register_new_node(gc_state_cmp,  old_ctrl);
 892   phase->register_new_node(gc_state_bool, old_ctrl);
 893 
 894   phase->set_ctrl(gc_state_offset, phase->C->root());
 895 
 896   assert(is_gc_state_test(gc_state_iff, flags), "Should match the shape");
 897 }
 898 
 899 void ShenandoahBarrierC2Support::test_null(Node*& ctrl, Node* val, Node*& null_ctrl, PhaseIdealLoop* phase) {
 900   Node* old_ctrl = ctrl;
 901   PhaseIterGVN& igvn = phase->igvn();
 902 
 903   const Type* val_t = igvn.type(val);
 904   if (val_t->meet(TypePtr::NULL_PTR) == val_t) {
 905     Node* null_cmp   = new CmpPNode(val, igvn.zerocon(T_OBJECT));
 906     Node* null_test  = new BoolNode(null_cmp, BoolTest::ne);
 907 
 908     IfNode* null_iff = new IfNode(old_ctrl, null_test, PROB_LIKELY(0.999), COUNT_UNKNOWN);
 909     ctrl             = new IfTrueNode(null_iff);
 910     null_ctrl        = new IfFalseNode(null_iff);
 911 
 912     IdealLoopTree* loop = phase->get_loop(old_ctrl);
 913     phase->register_control(null_iff,  loop, old_ctrl);
 914     phase->register_control(ctrl,      loop, null_iff);
 915     phase->register_control(null_ctrl, loop, null_iff);
 916 
 917     phase->register_new_node(null_cmp,  old_ctrl);
 918     phase->register_new_node(null_test, old_ctrl);
 919   }
 920 }
 921 
 922 void ShenandoahBarrierC2Support::test_in_cset(Node*& ctrl, Node*& not_cset_ctrl, Node* val, Node* raw_mem, PhaseIdealLoop* phase) {
 923   Node* old_ctrl = ctrl;
 924   PhaseIterGVN& igvn = phase->igvn();
 925 
 926   Node* raw_val        = new CastP2XNode(old_ctrl, val);
 927   Node* cset_idx       = new URShiftXNode(raw_val, igvn.intcon(ShenandoahHeapRegion::region_size_bytes_shift_jint()));
 928   Node* cset_addr      = igvn.makecon(TypeRawPtr::make(ShenandoahHeap::in_cset_fast_test_addr()));
 929   Node* cset_load_addr = new AddPNode(phase->C->top(), cset_addr, cset_idx);
 930   Node* cset_load      = new LoadBNode(old_ctrl, raw_mem, cset_load_addr,
 931                                        DEBUG_ONLY(phase->C->get_adr_type(Compile::AliasIdxRaw)) NOT_DEBUG(NULL),
 932                                        TypeInt::BYTE, MemNode::unordered);
 933   Node* cset_cmp       = new CmpINode(cset_load, igvn.zerocon(T_INT));
 934   Node* cset_bool      = new BoolNode(cset_cmp, BoolTest::ne);
 935 
 936   IfNode* cset_iff     = new IfNode(old_ctrl, cset_bool, PROB_UNLIKELY(0.999), COUNT_UNKNOWN);
 937   ctrl                 = new IfTrueNode(cset_iff);
 938   not_cset_ctrl        = new IfFalseNode(cset_iff);
 939 
 940   IdealLoopTree *loop = phase->get_loop(old_ctrl);
 941   phase->register_control(cset_iff,      loop, old_ctrl);
 942   phase->register_control(ctrl,          loop, cset_iff);
 943   phase->register_control(not_cset_ctrl, loop, cset_iff);
 944 
 945   phase->set_ctrl(cset_addr, phase->C->root());
 946 
 947   phase->register_new_node(raw_val,        old_ctrl);
 948   phase->register_new_node(cset_idx,       old_ctrl);
 949   phase->register_new_node(cset_load_addr, old_ctrl);
 950   phase->register_new_node(cset_load,      old_ctrl);
 951   phase->register_new_node(cset_cmp,       old_ctrl);
 952   phase->register_new_node(cset_bool,      old_ctrl);
 953 }
 954 
 955 void ShenandoahBarrierC2Support::call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase) {
 956   IdealLoopTree*loop = phase->get_loop(ctrl);
 957   const TypePtr* obj_type = phase->igvn().type(val)->is_oopptr();
 958 
 959   // The slow path stub consumes and produces raw memory in addition
 960   // to the existing memory edges
 961   Node* base = find_bottom_mem(ctrl, phase);
 962   MergeMemNode* mm = MergeMemNode::make(base);
 963   mm->set_memory_at(Compile::AliasIdxRaw, raw_mem);
 964   phase->register_new_node(mm, ctrl);
 965 
 966   address target = LP64_ONLY(UseCompressedOops) NOT_LP64(false) ?
 967           CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow) :
 968           CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier);
 969 
 970   address calladdr = is_native ? CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)
 971                                : target;
 972   const char* name = is_native ? "load_reference_barrier_native" : "load_reference_barrier";
 973   Node* call = new CallLeafNode(ShenandoahBarrierSetC2::shenandoah_load_reference_barrier_Type(obj_type), calladdr, name, TypeRawPtr::BOTTOM);
 974 
 975   call->init_req(TypeFunc::Control, ctrl);
 976   call->init_req(TypeFunc::I_O, phase->C->top());
 977   call->init_req(TypeFunc::Memory, mm);
 978   call->init_req(TypeFunc::FramePtr, phase->C->top());
 979   call->init_req(TypeFunc::ReturnAdr, phase->C->top());
 980   call->init_req(TypeFunc::Parms, val);
 981   call->init_req(TypeFunc::Parms+1, load_addr);
 982   phase->register_control(call, loop, ctrl);
 983   ctrl = new ProjNode(call, TypeFunc::Control);
 984   phase->register_control(ctrl, loop, call);
 985   result_mem = new ProjNode(call, TypeFunc::Memory);
 986   phase->register_new_node(result_mem, call);
 987   val = new ProjNode(call, TypeFunc::Parms);
 988   phase->register_new_node(val, call);
 989 }
 990 
 991 void ShenandoahBarrierC2Support::fix_ctrl(Node* barrier, Node* region, const MemoryGraphFixer& fixer, Unique_Node_List& uses, Unique_Node_List& uses_to_ignore, uint last, PhaseIdealLoop* phase) {
 992   Node* ctrl = phase->get_ctrl(barrier);
 993   Node* init_raw_mem = fixer.find_mem(ctrl, barrier);
 994 
 995   // Update the control of all nodes that should be after the
 996   // barrier control flow
 997   uses.clear();
 998   // Every node that is control dependent on the barrier's input
 999   // control will be after the expanded barrier. The raw memory (if
1000   // its memory is control dependent on the barrier's input control)
1001   // must stay above the barrier.
1002   uses_to_ignore.clear();
1003   if (phase->has_ctrl(init_raw_mem) && phase->get_ctrl(init_raw_mem) == ctrl && !init_raw_mem->is_Phi()) {
1004     uses_to_ignore.push(init_raw_mem);
1005   }
1006   for (uint next = 0; next < uses_to_ignore.size(); next++) {
1007     Node *n = uses_to_ignore.at(next);
1008     for (uint i = 0; i < n->req(); i++) {
1009       Node* in = n->in(i);
1010       if (in != NULL && phase->has_ctrl(in) && phase->get_ctrl(in) == ctrl) {
1011         uses_to_ignore.push(in);
1012       }
1013     }
1014   }
1015   for (DUIterator_Fast imax, i = ctrl->fast_outs(imax); i < imax; i++) {
1016     Node* u = ctrl->fast_out(i);
1017     if (u->_idx < last &&
1018         u != barrier &&
1019         !uses_to_ignore.member(u) &&
1020         (u->in(0) != ctrl || (!u->is_Region() && !u->is_Phi())) &&
1021         (ctrl->Opcode() != Op_CatchProj || u->Opcode() != Op_CreateEx)) {
1022       Node* old_c = phase->ctrl_or_self(u);
1023       Node* c = old_c;
1024       if (c != ctrl ||
1025           is_dominator_same_ctrl(old_c, barrier, u, phase) ||
1026           ShenandoahBarrierSetC2::is_shenandoah_state_load(u)) {
1027         phase->igvn().rehash_node_delayed(u);
1028         int nb = u->replace_edge(ctrl, region);
1029         if (u->is_CFG()) {
1030           if (phase->idom(u) == ctrl) {
1031             phase->set_idom(u, region, phase->dom_depth(region));
1032           }
1033         } else if (phase->get_ctrl(u) == ctrl) {
1034           assert(u != init_raw_mem, "should leave input raw mem above the barrier");
1035           uses.push(u);
1036         }
1037         assert(nb == 1, "more than 1 ctrl input?");
1038         --i, imax -= nb;
1039       }
1040     }
1041   }
1042 }
1043 
1044 static Node* create_phis_on_call_return(Node* ctrl, Node* c, Node* n, Node* n_clone, const CallProjections& projs, PhaseIdealLoop* phase) {
1045   Node* region = NULL;
1046   while (c != ctrl) {
1047     if (c->is_Region()) {
1048       region = c;
1049     }
1050     c = phase->idom(c);
1051   }
1052   assert(region != NULL, "");
1053   Node* phi = new PhiNode(region, n->bottom_type());
1054   for (uint j = 1; j < region->req(); j++) {
1055     Node* in = region->in(j);
1056     if (phase->is_dominator(projs.fallthrough_catchproj, in)) {
1057       phi->init_req(j, n);
1058     } else if (phase->is_dominator(projs.catchall_catchproj, in)) {
1059       phi->init_req(j, n_clone);
1060     } else {
1061       phi->init_req(j, create_phis_on_call_return(ctrl, in, n, n_clone, projs, phase));
1062     }
1063   }
1064   phase->register_new_node(phi, region);
1065   return phi;
1066 }
1067 
1068 void ShenandoahBarrierC2Support::pin_and_expand(PhaseIdealLoop* phase) {
1069   ShenandoahBarrierSetC2State* state = ShenandoahBarrierSetC2::bsc2()->state();
1070 
1071   Unique_Node_List uses;
1072   for (int i = 0; i < state->enqueue_barriers_count(); i++) {
1073     Node* barrier = state->enqueue_barrier(i);
1074     Node* ctrl = phase->get_ctrl(barrier);
1075     IdealLoopTree* loop = phase->get_loop(ctrl);
1076     if (loop->_head->is_OuterStripMinedLoop()) {
1077       // Expanding a barrier here will break loop strip mining
1078       // verification. Transform the loop so the loop nest doesn't
1079       // appear as strip mined.
1080       OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1081       hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1082     }
1083   }
1084 
1085   Node_Stack stack(0);
1086   Node_List clones;
1087   for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1088     ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1089     if (lrb->is_redundant()) {
1090       continue;
1091     }
1092 
1093     Node* ctrl = phase->get_ctrl(lrb);
1094     if ((ctrl->is_Proj() && ctrl->in(0)->is_CallJava()) || ctrl->is_CallJava()) {
1095       CallNode* call = ctrl->is_Proj() ? ctrl->in(0)->as_CallJava() : ctrl->as_CallJava();
1096       if (call->entry_point() == OptoRuntime::rethrow_stub()) {
1097         // The rethrow call may have too many projections to be
1098         // properly handled here. Given there's no reason for a
1099         // barrier to depend on the call, move it above the call
1100         stack.push(lrb, 0);
1101         do {
1102           Node* n = stack.node();
1103           uint idx = stack.index();
1104           if (idx < n->req()) {
1105             Node* in = n->in(idx);
1106             stack.set_index(idx+1);
1107             if (in != NULL) {
1108               if (phase->has_ctrl(in)) {
1109                 if (phase->is_dominator(call, phase->get_ctrl(in))) {
1110 #ifdef ASSERT
1111                   for (uint i = 0; i < stack.size(); i++) {
1112                     assert(stack.node_at(i) != in, "node shouldn't have been seen yet");
1113                   }
1114 #endif
1115                   stack.push(in, 0);
1116                 }
1117               } else {
1118                 assert(phase->is_dominator(in, call->in(0)), "no dependency on the call");
1119               }
1120             }
1121           } else {
1122             phase->set_ctrl(n, call->in(0));
1123             stack.pop();
1124           }
1125         } while(stack.size() > 0);
1126         continue;
1127       }
1128       CallProjections projs;
1129       call->extract_projections(&projs, false, false);
1130 
1131       Node* lrb_clone = lrb->clone();
1132       phase->register_new_node(lrb_clone, projs.catchall_catchproj);
1133       phase->set_ctrl(lrb, projs.fallthrough_catchproj);
1134 
1135       stack.push(lrb, 0);
1136       clones.push(lrb_clone);
1137 
1138       do {
1139         assert(stack.size() == clones.size(), "");
1140         Node* n = stack.node();
1141 #ifdef ASSERT
1142         if (n->is_Load()) {
1143           Node* mem = n->in(MemNode::Memory);
1144           for (DUIterator_Fast jmax, j = mem->fast_outs(jmax); j < jmax; j++) {
1145             Node* u = mem->fast_out(j);
1146             assert(!u->is_Store() || !u->is_LoadStore() || phase->get_ctrl(u) != ctrl, "anti dependent store?");
1147           }
1148         }
1149 #endif
1150         uint idx = stack.index();
1151         Node* n_clone = clones.at(clones.size()-1);
1152         if (idx < n->outcnt()) {
1153           Node* u = n->raw_out(idx);
1154           Node* c = phase->ctrl_or_self(u);
1155           if (phase->is_dominator(call, c) && phase->is_dominator(c, projs.fallthrough_proj)) {
1156             stack.set_index(idx+1);
1157             assert(!u->is_CFG(), "");
1158             stack.push(u, 0);
1159             Node* u_clone = u->clone();
1160             int nb = u_clone->replace_edge(n, n_clone);
1161             assert(nb > 0, "should have replaced some uses");
1162             phase->register_new_node(u_clone, projs.catchall_catchproj);
1163             clones.push(u_clone);
1164             phase->set_ctrl(u, projs.fallthrough_catchproj);
1165           } else {
1166             bool replaced = false;
1167             if (u->is_Phi()) {
1168               for (uint k = 1; k < u->req(); k++) {
1169                 if (u->in(k) == n) {
1170                   if (phase->is_dominator(projs.catchall_catchproj, u->in(0)->in(k))) {
1171                     phase->igvn().replace_input_of(u, k, n_clone);
1172                     replaced = true;
1173                   } else if (!phase->is_dominator(projs.fallthrough_catchproj, u->in(0)->in(k))) {
1174                     phase->igvn().replace_input_of(u, k, create_phis_on_call_return(ctrl, u->in(0)->in(k), n, n_clone, projs, phase));
1175                     replaced = true;
1176                   }
1177                 }
1178               }
1179             } else {
1180               if (phase->is_dominator(projs.catchall_catchproj, c)) {
1181                 phase->igvn().rehash_node_delayed(u);
1182                 int nb = u->replace_edge(n, n_clone);
1183                 assert(nb > 0, "should have replaced some uses");
1184                 replaced = true;
1185               } else if (!phase->is_dominator(projs.fallthrough_catchproj, c)) {
1186                 phase->igvn().rehash_node_delayed(u);
1187                 int nb = u->replace_edge(n, create_phis_on_call_return(ctrl, c, n, n_clone, projs, phase));
1188                 assert(nb > 0, "should have replaced some uses");
1189                 replaced = true;
1190               }
1191             }
1192             if (!replaced) {
1193               stack.set_index(idx+1);
1194             }
1195           }
1196         } else {
1197           stack.pop();
1198           clones.pop();
1199         }
1200       } while (stack.size() > 0);
1201       assert(stack.size() == 0 && clones.size() == 0, "");
1202     }
1203   }
1204 
1205   for (int i = 0; i < state->load_reference_barriers_count(); i++) {
1206     ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1207     if (lrb->is_redundant()) {
1208       continue;
1209     }
1210     Node* ctrl = phase->get_ctrl(lrb);
1211     IdealLoopTree* loop = phase->get_loop(ctrl);
1212     if (loop->_head->is_OuterStripMinedLoop()) {
1213       // Expanding a barrier here will break loop strip mining
1214       // verification. Transform the loop so the loop nest doesn't
1215       // appear as strip mined.
1216       OuterStripMinedLoopNode* outer = loop->_head->as_OuterStripMinedLoop();
1217       hide_strip_mined_loop(outer, outer->unique_ctrl_out()->as_CountedLoop(), phase);
1218     }
1219   }
1220 
1221   // Expand load-reference-barriers
1222   MemoryGraphFixer fixer(Compile::AliasIdxRaw, true, phase);
1223   Unique_Node_List uses_to_ignore;
1224   for (int i = state->load_reference_barriers_count() - 1; i >= 0; i--) {
1225     ShenandoahLoadReferenceBarrierNode* lrb = state->load_reference_barrier(i);
1226     if (lrb->is_redundant()) {
1227       phase->igvn().replace_node(lrb, lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn));
1228       continue;
1229     }
1230     uint last = phase->C->unique();
1231     Node* ctrl = phase->get_ctrl(lrb);
1232     Node* val = lrb->in(ShenandoahLoadReferenceBarrierNode::ValueIn);
1233 
1234     Node* orig_ctrl = ctrl;
1235 
1236     Node* raw_mem = fixer.find_mem(ctrl, lrb);
1237     Node* init_raw_mem = raw_mem;
1238     Node* raw_mem_for_ctrl = fixer.find_mem(ctrl, NULL);
1239 
1240     IdealLoopTree* loop = phase->get_loop(ctrl);
1241 
1242     assert(val->bottom_type()->make_oopptr(), "need oop");
1243     assert(val->bottom_type()->make_oopptr()->const_oop() == NULL, "expect non-constant");
1244 
1245     enum { _heap_stable = 1, _not_cset, _evac_path, PATH_LIMIT };
1246     Node* region = new RegionNode(PATH_LIMIT);
1247     Node* val_phi = new PhiNode(region, val->bottom_type()->is_oopptr());
1248     Node* raw_mem_phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM);
1249 
1250     // Stable path.
1251     Node* heap_stable_ctrl = NULL;
1252     test_gc_state(ctrl, raw_mem, heap_stable_ctrl, phase, ShenandoahHeap::HAS_FORWARDED);
1253     IfNode* heap_stable_iff = heap_stable_ctrl->in(0)->as_If();
1254 
1255     // Heap stable case
1256     region->init_req(_heap_stable, heap_stable_ctrl);
1257     val_phi->init_req(_heap_stable, val);
1258     raw_mem_phi->init_req(_heap_stable, raw_mem);
1259 
1260     // Test for in-cset.
1261     // Wires !in_cset(obj) to slot 2 of region and phis
1262     Node* not_cset_ctrl = NULL;
1263     test_in_cset(ctrl, not_cset_ctrl, val, raw_mem, phase);
1264     if (not_cset_ctrl != NULL) {
1265       region->init_req(_not_cset, not_cset_ctrl);
1266       val_phi->init_req(_not_cset, val);
1267       raw_mem_phi->init_req(_not_cset, raw_mem);
1268     }
1269 
1270     // Call lrb-stub and wire up that path in slots 4
1271     Node* result_mem = NULL;
1272 
1273     Node* addr;
1274     if (ShenandoahSelfFixing) {
1275       VectorSet visited(Thread::current()->resource_area());
1276       addr = get_load_addr(phase, visited, lrb);
1277     } else {
1278       addr = phase->igvn().zerocon(T_OBJECT);
1279     }
1280     if (addr->Opcode() == Op_AddP) {
1281       Node* orig_base = addr->in(AddPNode::Base);
1282       Node* base = new CheckCastPPNode(ctrl, orig_base, orig_base->bottom_type(), true);
1283       phase->register_new_node(base, ctrl);
1284       if (addr->in(AddPNode::Base) == addr->in((AddPNode::Address))) {
1285         // Field access
1286         addr = addr->clone();
1287         addr->set_req(AddPNode::Base, base);
1288         addr->set_req(AddPNode::Address, base);
1289         phase->register_new_node(addr, ctrl);
1290       } else {
1291         Node* addr2 = addr->in(AddPNode::Address);
1292         if (addr2->Opcode() == Op_AddP && addr2->in(AddPNode::Base) == addr2->in(AddPNode::Address) &&
1293               addr2->in(AddPNode::Base) == orig_base) {
1294           addr2 = addr2->clone();
1295           addr2->set_req(AddPNode::Base, base);
1296           addr2->set_req(AddPNode::Address, base);
1297           phase->register_new_node(addr2, ctrl);
1298           addr = addr->clone();
1299           addr->set_req(AddPNode::Base, base);
1300           addr->set_req(AddPNode::Address, addr2);
1301           phase->register_new_node(addr, ctrl);
1302         }
1303       }
1304     }
1305     call_lrb_stub(ctrl, val, addr, result_mem, raw_mem, lrb->is_native(), phase);
1306     region->init_req(_evac_path, ctrl);
1307     val_phi->init_req(_evac_path, val);
1308     raw_mem_phi->init_req(_evac_path, result_mem);
1309 
1310     phase->register_control(region, loop, heap_stable_iff);
1311     Node* out_val = val_phi;
1312     phase->register_new_node(val_phi, region);
1313     phase->register_new_node(raw_mem_phi, region);
1314 
1315     fix_ctrl(lrb, region, fixer, uses, uses_to_ignore, last, phase);
1316 
1317     ctrl = orig_ctrl;
1318 
1319     phase->igvn().replace_node(lrb, out_val);
1320 
1321     follow_barrier_uses(out_val, ctrl, uses, phase);
1322 
1323     for(uint next = 0; next < uses.size(); next++ ) {
1324       Node *n = uses.at(next);
1325       assert(phase->get_ctrl(n) == ctrl, "bad control");
1326       assert(n != init_raw_mem, "should leave input raw mem above the barrier");
1327       phase->set_ctrl(n, region);
1328       follow_barrier_uses(n, ctrl, uses, phase);
1329     }
1330 
1331     // The slow path call produces memory: hook the raw memory phi
1332     // from the expanded load reference barrier with the rest of the graph
1333     // which may require adding memory phis at every post dominated
1334     // region and at enclosing loop heads. Use the memory state
1335     // collected in memory_nodes to fix the memory graph. Update that
1336     // memory state as we go.
1337     fixer.fix_mem(ctrl, region, init_raw_mem, raw_mem_for_ctrl, raw_mem_phi, uses);
1338   }
1339   // Done expanding load-reference-barriers.
1340   assert(ShenandoahBarrierSetC2::bsc2()->state()->load_reference_barriers_count() == 0, "all load reference barrier nodes should have been replaced");
1341 
1342   for (int i = state->enqueue_barriers_count() - 1; i >= 0; i--) {
1343     Node* barrier = state->enqueue_barrier(i);
1344     Node* pre_val = barrier->in(1);
1345 
1346     if (phase->igvn().type(pre_val)->higher_equal(TypePtr::NULL_PTR)) {
1347       ShouldNotReachHere();
1348       continue;
1349     }
1350 
1351     Node* ctrl = phase->get_ctrl(barrier);
1352 
1353     if (ctrl->is_Proj() && ctrl->in(0)->is_CallJava()) {
1354       assert(is_dominator(phase->get_ctrl(pre_val), ctrl->in(0)->in(0), pre_val, ctrl->in(0), phase), "can't move");
1355       ctrl = ctrl->in(0)->in(0);
1356       phase->set_ctrl(barrier, ctrl);
1357     } else if (ctrl->is_CallRuntime()) {
1358       assert(is_dominator(phase->get_ctrl(pre_val), ctrl->in(0), pre_val, ctrl, phase), "can't move");
1359       ctrl = ctrl->in(0);
1360       phase->set_ctrl(barrier, ctrl);
1361     }
1362 
1363     Node* init_ctrl = ctrl;
1364     IdealLoopTree* loop = phase->get_loop(ctrl);
1365     Node* raw_mem = fixer.find_mem(ctrl, barrier);
1366     Node* init_raw_mem = raw_mem;
1367     Node* raw_mem_for_ctrl = fixer.find_mem(ctrl, NULL);
1368     Node* heap_stable_ctrl = NULL;
1369     Node* null_ctrl = NULL;
1370     uint last = phase->C->unique();
1371 
1372     enum { _heap_stable = 1, _heap_unstable, PATH_LIMIT };
1373     Node* region = new RegionNode(PATH_LIMIT);
1374     Node* phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM);
1375 
1376     enum { _fast_path = 1, _slow_path, _null_path, PATH_LIMIT2 };
1377     Node* region2 = new RegionNode(PATH_LIMIT2);
1378     Node* phi2 = PhiNode::make(region2, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM);
1379 
1380     // Stable path.
1381     test_gc_state(ctrl, raw_mem, heap_stable_ctrl, phase, ShenandoahHeap::MARKING);
1382     region->init_req(_heap_stable, heap_stable_ctrl);
1383     phi->init_req(_heap_stable, raw_mem);
1384 
1385     // Null path
1386     Node* reg2_ctrl = NULL;
1387     test_null(ctrl, pre_val, null_ctrl, phase);
1388     if (null_ctrl != NULL) {
1389       reg2_ctrl = null_ctrl->in(0);
1390       region2->init_req(_null_path, null_ctrl);
1391       phi2->init_req(_null_path, raw_mem);
1392     } else {
1393       region2->del_req(_null_path);
1394       phi2->del_req(_null_path);
1395     }
1396 
1397     const int index_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset());
1398     const int buffer_offset = in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset());
1399     Node* thread = new ThreadLocalNode();
1400     phase->register_new_node(thread, ctrl);
1401     Node* buffer_adr = new AddPNode(phase->C->top(), thread, phase->igvn().MakeConX(buffer_offset));
1402     phase->register_new_node(buffer_adr, ctrl);
1403     Node* index_adr = new AddPNode(phase->C->top(), thread, phase->igvn().MakeConX(index_offset));
1404     phase->register_new_node(index_adr, ctrl);
1405 
1406     BasicType index_bt = TypeX_X->basic_type();
1407     assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size.");
1408     const TypePtr* adr_type = TypeRawPtr::BOTTOM;
1409     Node* index = new LoadXNode(ctrl, raw_mem, index_adr, adr_type, TypeX_X, MemNode::unordered);
1410     phase->register_new_node(index, ctrl);
1411     Node* index_cmp = new CmpXNode(index, phase->igvn().MakeConX(0));
1412     phase->register_new_node(index_cmp, ctrl);
1413     Node* index_test = new BoolNode(index_cmp, BoolTest::ne);
1414     phase->register_new_node(index_test, ctrl);
1415     IfNode* queue_full_iff = new IfNode(ctrl, index_test, PROB_LIKELY(0.999), COUNT_UNKNOWN);
1416     if (reg2_ctrl == NULL) reg2_ctrl = queue_full_iff;
1417     phase->register_control(queue_full_iff, loop, ctrl);
1418     Node* not_full = new IfTrueNode(queue_full_iff);
1419     phase->register_control(not_full, loop, queue_full_iff);
1420     Node* full = new IfFalseNode(queue_full_iff);
1421     phase->register_control(full, loop, queue_full_iff);
1422 
1423     ctrl = not_full;
1424 
1425     Node* next_index = new SubXNode(index, phase->igvn().MakeConX(sizeof(intptr_t)));
1426     phase->register_new_node(next_index, ctrl);
1427 
1428     Node* buffer  = new LoadPNode(ctrl, raw_mem, buffer_adr, adr_type, TypeRawPtr::NOTNULL, MemNode::unordered);
1429     phase->register_new_node(buffer, ctrl);
1430     Node *log_addr = new AddPNode(phase->C->top(), buffer, next_index);
1431     phase->register_new_node(log_addr, ctrl);
1432     Node* log_store = new StorePNode(ctrl, raw_mem, log_addr, adr_type, pre_val, MemNode::unordered);
1433     phase->register_new_node(log_store, ctrl);
1434     // update the index
1435     Node* index_update = new StoreXNode(ctrl, log_store, index_adr, adr_type, next_index, MemNode::unordered);
1436     phase->register_new_node(index_update, ctrl);
1437 
1438     // Fast-path case
1439     region2->init_req(_fast_path, ctrl);
1440     phi2->init_req(_fast_path, index_update);
1441 
1442     ctrl = full;
1443 
1444     Node* base = find_bottom_mem(ctrl, phase);
1445 
1446     MergeMemNode* mm = MergeMemNode::make(base);
1447     mm->set_memory_at(Compile::AliasIdxRaw, raw_mem);
1448     phase->register_new_node(mm, ctrl);
1449 
1450     Node* call = new CallLeafNode(ShenandoahBarrierSetC2::write_ref_field_pre_entry_Type(), CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre_entry), "shenandoah_wb_pre", TypeRawPtr::BOTTOM);
1451     call->init_req(TypeFunc::Control, ctrl);
1452     call->init_req(TypeFunc::I_O, phase->C->top());
1453     call->init_req(TypeFunc::Memory, mm);
1454     call->init_req(TypeFunc::FramePtr, phase->C->top());
1455     call->init_req(TypeFunc::ReturnAdr, phase->C->top());
1456     call->init_req(TypeFunc::Parms, pre_val);
1457     call->init_req(TypeFunc::Parms+1, thread);
1458     phase->register_control(call, loop, ctrl);
1459 
1460     Node* ctrl_proj = new ProjNode(call, TypeFunc::Control);
1461     phase->register_control(ctrl_proj, loop, call);
1462     Node* mem_proj = new ProjNode(call, TypeFunc::Memory);
1463     phase->register_new_node(mem_proj, call);
1464 
1465     // Slow-path case
1466     region2->init_req(_slow_path, ctrl_proj);
1467     phi2->init_req(_slow_path, mem_proj);
1468 
1469     phase->register_control(region2, loop, reg2_ctrl);
1470     phase->register_new_node(phi2, region2);
1471 
1472     region->init_req(_heap_unstable, region2);
1473     phi->init_req(_heap_unstable, phi2);
1474 
1475     phase->register_control(region, loop, heap_stable_ctrl->in(0));
1476     phase->register_new_node(phi, region);
1477 
1478     fix_ctrl(barrier, region, fixer, uses, uses_to_ignore, last, phase);
1479     for(uint next = 0; next < uses.size(); next++ ) {
1480       Node *n = uses.at(next);
1481       assert(phase->get_ctrl(n) == init_ctrl, "bad control");
1482       assert(n != init_raw_mem, "should leave input raw mem above the barrier");
1483       phase->set_ctrl(n, region);
1484       follow_barrier_uses(n, init_ctrl, uses, phase);
1485     }
1486     fixer.fix_mem(init_ctrl, region, init_raw_mem, raw_mem_for_ctrl, phi, uses);
1487 
1488     phase->igvn().replace_node(barrier, pre_val);
1489   }
1490   assert(state->enqueue_barriers_count() == 0, "all enqueue barrier nodes should have been replaced");
1491 
1492 }
1493 
1494 Node* ShenandoahBarrierC2Support::get_load_addr(PhaseIdealLoop* phase, VectorSet& visited, Node* in) {
1495   if (visited.test_set(in->_idx)) {
1496     return NULL;
1497   }
1498   switch (in->Opcode()) {
1499     case Op_Proj:
1500       return get_load_addr(phase, visited, in->in(0));
1501     case Op_CastPP:
1502     case Op_CheckCastPP:
1503     case Op_DecodeN:
1504     case Op_EncodeP:
1505       return get_load_addr(phase, visited, in->in(1));
1506     case Op_LoadN:
1507     case Op_LoadP:
1508       return in->in(MemNode::Address);
1509     case Op_CompareAndExchangeN:
1510     case Op_CompareAndExchangeP:
1511     case Op_GetAndSetN:
1512     case Op_GetAndSetP:
1513     case Op_ShenandoahCompareAndExchangeP:
1514     case Op_ShenandoahCompareAndExchangeN:
1515       // Those instructions would just have stored a different
1516       // value into the field. No use to attempt to fix it at this point.
1517       return phase->igvn().zerocon(T_OBJECT);
1518     case Op_CMoveP:
1519     case Op_CMoveN: {
1520       Node* t = get_load_addr(phase, visited, in->in(CMoveNode::IfTrue));
1521       Node* f = get_load_addr(phase, visited, in->in(CMoveNode::IfFalse));
1522       // Handle unambiguous cases: single address reported on both branches.
1523       if (t != NULL && f == NULL) return t;
1524       if (t == NULL && f != NULL) return f;
1525       if (t != NULL && t == f)    return t;
1526       // Ambiguity.
1527       return phase->igvn().zerocon(T_OBJECT);
1528     }
1529     case Op_Phi: {
1530       Node* addr = NULL;
1531       for (uint i = 1; i < in->req(); i++) {
1532         Node* addr1 = get_load_addr(phase, visited, in->in(i));
1533         if (addr == NULL) {
1534           addr = addr1;
1535         }
1536         if (addr != addr1) {
1537           return phase->igvn().zerocon(T_OBJECT);
1538         }
1539       }
1540       return addr;
1541     }
1542     case Op_ShenandoahLoadReferenceBarrier:
1543       return get_load_addr(phase, visited, in->in(ShenandoahLoadReferenceBarrierNode::ValueIn));
1544     case Op_ShenandoahEnqueueBarrier:
1545       return get_load_addr(phase, visited, in->in(1));
1546     case Op_CallDynamicJava:
1547     case Op_CallLeaf:
1548     case Op_CallStaticJava:
1549     case Op_ConN:
1550     case Op_ConP:
1551     case Op_Parm:
1552     case Op_CreateEx:
1553       return phase->igvn().zerocon(T_OBJECT);
1554     default:
1555 #ifdef ASSERT
1556       fatal("Unknown node in get_load_addr: %s", NodeClassNames[in->Opcode()]);
1557 #endif
1558       return phase->igvn().zerocon(T_OBJECT);
1559   }
1560 
1561 }
1562 
1563 void ShenandoahBarrierC2Support::move_gc_state_test_out_of_loop(IfNode* iff, PhaseIdealLoop* phase) {
1564   IdealLoopTree *loop = phase->get_loop(iff);
1565   Node* loop_head = loop->_head;
1566   Node* entry_c = loop_head->in(LoopNode::EntryControl);
1567 
1568   Node* bol = iff->in(1);
1569   Node* cmp = bol->in(1);
1570   Node* andi = cmp->in(1);
1571   Node* load = andi->in(1);
1572 
1573   assert(is_gc_state_load(load), "broken");
1574   if (!phase->is_dominator(load->in(0), entry_c)) {
1575     Node* mem_ctrl = NULL;
1576     Node* mem = dom_mem(load->in(MemNode::Memory), loop_head, Compile::AliasIdxRaw, mem_ctrl, phase);
1577     load = load->clone();
1578     load->set_req(MemNode::Memory, mem);
1579     load->set_req(0, entry_c);
1580     phase->register_new_node(load, entry_c);
1581     andi = andi->clone();
1582     andi->set_req(1, load);
1583     phase->register_new_node(andi, entry_c);
1584     cmp = cmp->clone();
1585     cmp->set_req(1, andi);
1586     phase->register_new_node(cmp, entry_c);
1587     bol = bol->clone();
1588     bol->set_req(1, cmp);
1589     phase->register_new_node(bol, entry_c);
1590 
1591     Node* old_bol =iff->in(1);
1592     phase->igvn().replace_input_of(iff, 1, bol);
1593   }
1594 }
1595 
1596 bool ShenandoahBarrierC2Support::identical_backtoback_ifs(Node* n, PhaseIdealLoop* phase) {
1597   if (!n->is_If() || n->is_CountedLoopEnd()) {
1598     return false;
1599   }
1600   Node* region = n->in(0);
1601 
1602   if (!region->is_Region()) {
1603     return false;
1604   }
1605   Node* dom = phase->idom(region);
1606   if (!dom->is_If()) {
1607     return false;
1608   }
1609 
1610   if (!is_heap_stable_test(n) || !is_heap_stable_test(dom)) {
1611     return false;
1612   }
1613 
1614   IfNode* dom_if = dom->as_If();
1615   Node* proj_true = dom_if->proj_out(1);
1616   Node* proj_false = dom_if->proj_out(0);
1617 
1618   for (uint i = 1; i < region->req(); i++) {
1619     if (phase->is_dominator(proj_true, region->in(i))) {
1620       continue;
1621     }
1622     if (phase->is_dominator(proj_false, region->in(i))) {
1623       continue;
1624     }
1625     return false;
1626   }
1627 
1628   return true;
1629 }
1630 
1631 void ShenandoahBarrierC2Support::merge_back_to_back_tests(Node* n, PhaseIdealLoop* phase) {
1632   assert(is_heap_stable_test(n), "no other tests");
1633   if (identical_backtoback_ifs(n, phase)) {
1634     Node* n_ctrl = n->in(0);
1635     if (phase->can_split_if(n_ctrl)) {
1636       IfNode* dom_if = phase->idom(n_ctrl)->as_If();
1637       if (is_heap_stable_test(n)) {
1638         Node* gc_state_load = n->in(1)->in(1)->in(1)->in(1);
1639         assert(is_gc_state_load(gc_state_load), "broken");
1640         Node* dom_gc_state_load = dom_if->in(1)->in(1)->in(1)->in(1);
1641         assert(is_gc_state_load(dom_gc_state_load), "broken");
1642         if (gc_state_load != dom_gc_state_load) {
1643           phase->igvn().replace_node(gc_state_load, dom_gc_state_load);
1644         }
1645       }
1646       PhiNode* bolphi = PhiNode::make_blank(n_ctrl, n->in(1));
1647       Node* proj_true = dom_if->proj_out(1);
1648       Node* proj_false = dom_if->proj_out(0);
1649       Node* con_true = phase->igvn().makecon(TypeInt::ONE);
1650       Node* con_false = phase->igvn().makecon(TypeInt::ZERO);
1651 
1652       for (uint i = 1; i < n_ctrl->req(); i++) {
1653         if (phase->is_dominator(proj_true, n_ctrl->in(i))) {
1654           bolphi->init_req(i, con_true);
1655         } else {
1656           assert(phase->is_dominator(proj_false, n_ctrl->in(i)), "bad if");
1657           bolphi->init_req(i, con_false);
1658         }
1659       }
1660       phase->register_new_node(bolphi, n_ctrl);
1661       phase->igvn().replace_input_of(n, 1, bolphi);
1662       phase->do_split_if(n);
1663     }
1664   }
1665 }
1666 
1667 IfNode* ShenandoahBarrierC2Support::find_unswitching_candidate(const IdealLoopTree* loop, PhaseIdealLoop* phase) {
1668   // Find first invariant test that doesn't exit the loop
1669   LoopNode *head = loop->_head->as_Loop();
1670   IfNode* unswitch_iff = NULL;
1671   Node* n = head->in(LoopNode::LoopBackControl);
1672   int loop_has_sfpts = -1;
1673   while (n != head) {
1674     Node* n_dom = phase->idom(n);
1675     if (n->is_Region()) {
1676       if (n_dom->is_If()) {
1677         IfNode* iff = n_dom->as_If();
1678         if (iff->in(1)->is_Bool()) {
1679           BoolNode* bol = iff->in(1)->as_Bool();
1680           if (bol->in(1)->is_Cmp()) {
1681             // If condition is invariant and not a loop exit,
1682             // then found reason to unswitch.
1683             if (is_heap_stable_test(iff) &&
1684                 (loop_has_sfpts == -1 || loop_has_sfpts == 0)) {
1685               assert(!loop->is_loop_exit(iff), "both branches should be in the loop");
1686               if (loop_has_sfpts == -1) {
1687                 for(uint i = 0; i < loop->_body.size(); i++) {
1688                   Node *m = loop->_body[i];
1689                   if (m->is_SafePoint() && !m->is_CallLeaf()) {
1690                     loop_has_sfpts = 1;
1691                     break;
1692                   }
1693                 }
1694                 if (loop_has_sfpts == -1) {
1695                   loop_has_sfpts = 0;
1696                 }
1697               }
1698               if (!loop_has_sfpts) {
1699                 unswitch_iff = iff;
1700               }
1701             }
1702           }
1703         }
1704       }
1705     }
1706     n = n_dom;
1707   }
1708   return unswitch_iff;
1709 }
1710 
1711 
1712 void ShenandoahBarrierC2Support::optimize_after_expansion(VectorSet &visited, Node_Stack &stack, Node_List &old_new, PhaseIdealLoop* phase) {
1713   Node_List heap_stable_tests;
1714   stack.push(phase->C->start(), 0);
1715   do {
1716     Node* n = stack.node();
1717     uint i = stack.index();
1718 
1719     if (i < n->outcnt()) {
1720       Node* u = n->raw_out(i);
1721       stack.set_index(i+1);
1722       if (!visited.test_set(u->_idx)) {
1723         stack.push(u, 0);
1724       }
1725     } else {
1726       stack.pop();
1727       if (n->is_If() && is_heap_stable_test(n)) {
1728         heap_stable_tests.push(n);
1729       }
1730     }
1731   } while (stack.size() > 0);
1732 
1733   for (uint i = 0; i < heap_stable_tests.size(); i++) {
1734     Node* n = heap_stable_tests.at(i);
1735     assert(is_heap_stable_test(n), "only evacuation test");
1736     merge_back_to_back_tests(n, phase);
1737   }
1738 
1739   if (!phase->C->major_progress()) {
1740     VectorSet seen(Thread::current()->resource_area());
1741     for (uint i = 0; i < heap_stable_tests.size(); i++) {
1742       Node* n = heap_stable_tests.at(i);
1743       IdealLoopTree* loop = phase->get_loop(n);
1744       if (loop != phase->ltree_root() &&
1745           loop->_child == NULL &&
1746           !loop->_irreducible) {
1747         Node* head = loop->_head;
1748         if (head->is_Loop() &&
1749             (!head->is_CountedLoop() || head->as_CountedLoop()->is_main_loop() || head->as_CountedLoop()->is_normal_loop()) &&
1750             !seen.test_set(head->_idx)) {
1751           IfNode* iff = find_unswitching_candidate(loop, phase);
1752           if (iff != NULL) {
1753             Node* bol = iff->in(1);
1754             if (head->as_Loop()->is_strip_mined()) {
1755               head->as_Loop()->verify_strip_mined(0);
1756             }
1757             move_gc_state_test_out_of_loop(iff, phase);
1758 
1759             AutoNodeBudget node_budget(phase);
1760 
1761             if (loop->policy_unswitching(phase)) {
1762               if (head->as_Loop()->is_strip_mined()) {
1763                 OuterStripMinedLoopNode* outer = head->as_CountedLoop()->outer_loop();
1764                 hide_strip_mined_loop(outer, head->as_CountedLoop(), phase);
1765               }
1766               phase->do_unswitching(loop, old_new);
1767             } else {
1768               // Not proceeding with unswitching. Move load back in
1769               // the loop.
1770               phase->igvn().replace_input_of(iff, 1, bol);
1771             }
1772           }
1773         }
1774       }
1775     }
1776   }
1777 }
1778 
1779 #ifdef ASSERT
1780 void ShenandoahBarrierC2Support::verify_raw_mem(RootNode* root) {
1781   const bool trace = false;
1782   ResourceMark rm;
1783   Unique_Node_List nodes;
1784   Unique_Node_List controls;
1785   Unique_Node_List memories;
1786 
1787   nodes.push(root);
1788   for (uint next = 0; next < nodes.size(); next++) {
1789     Node *n  = nodes.at(next);
1790     if (ShenandoahBarrierSetC2::is_shenandoah_lrb_call(n)) {
1791       controls.push(n);
1792       if (trace) { tty->print("XXXXXX verifying"); n->dump(); }
1793       for (uint next2 = 0; next2 < controls.size(); next2++) {
1794         Node *m = controls.at(next2);
1795         for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
1796           Node* u = m->fast_out(i);
1797           if (u->is_CFG() && !u->is_Root() &&
1798               !(u->Opcode() == Op_CProj && u->in(0)->Opcode() == Op_NeverBranch && u->as_Proj()->_con == 1) &&
1799               !(u->is_Region() && u->unique_ctrl_out()->Opcode() == Op_Halt)) {
1800             if (trace) { tty->print("XXXXXX pushing control"); u->dump(); }
1801             controls.push(u);
1802           }
1803         }
1804       }
1805       memories.push(n->as_Call()->proj_out(TypeFunc::Memory));
1806       for (uint next2 = 0; next2 < memories.size(); next2++) {
1807         Node *m = memories.at(next2);
1808         assert(m->bottom_type() == Type::MEMORY, "");
1809         for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
1810           Node* u = m->fast_out(i);
1811           if (u->bottom_type() == Type::MEMORY && (u->is_Mem() || u->is_ClearArray())) {
1812             if (trace) { tty->print("XXXXXX pushing memory"); u->dump(); }
1813             memories.push(u);
1814           } else if (u->is_LoadStore()) {
1815             if (trace) { tty->print("XXXXXX pushing memory"); u->find_out_with(Op_SCMemProj)->dump(); }
1816             memories.push(u->find_out_with(Op_SCMemProj));
1817           } else if (u->is_MergeMem() && u->as_MergeMem()->memory_at(Compile::AliasIdxRaw) == m) {
1818             if (trace) { tty->print("XXXXXX pushing memory"); u->dump(); }
1819             memories.push(u);
1820           } else if (u->is_Phi()) {
1821             assert(u->bottom_type() == Type::MEMORY, "");
1822             if (u->adr_type() == TypeRawPtr::BOTTOM || u->adr_type() == TypePtr::BOTTOM) {
1823               assert(controls.member(u->in(0)), "");
1824               if (trace) { tty->print("XXXXXX pushing memory"); u->dump(); }
1825               memories.push(u);
1826             }
1827           } else if (u->is_SafePoint() || u->is_MemBar()) {
1828             for (DUIterator_Fast jmax, j = u->fast_outs(jmax); j < jmax; j++) {
1829               Node* uu = u->fast_out(j);
1830               if (uu->bottom_type() == Type::MEMORY) {
1831                 if (trace) { tty->print("XXXXXX pushing memory"); uu->dump(); }
1832                 memories.push(uu);
1833               }
1834             }
1835           }
1836         }
1837       }
1838       for (uint next2 = 0; next2 < controls.size(); next2++) {
1839         Node *m = controls.at(next2);
1840         if (m->is_Region()) {
1841           bool all_in = true;
1842           for (uint i = 1; i < m->req(); i++) {
1843             if (!controls.member(m->in(i))) {
1844               all_in = false;
1845               break;
1846             }
1847           }
1848           if (trace) { tty->print("XXX verifying %s", all_in ? "all in" : ""); m->dump(); }
1849           bool found_phi = false;
1850           for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax && !found_phi; j++) {
1851             Node* u = m->fast_out(j);
1852             if (u->is_Phi() && memories.member(u)) {
1853               found_phi = true;
1854               for (uint i = 1; i < u->req() && found_phi; i++) {
1855                 Node* k = u->in(i);
1856                 if (memories.member(k) != controls.member(m->in(i))) {
1857                   found_phi = false;
1858                 }
1859               }
1860             }
1861           }
1862           assert(found_phi || all_in, "");
1863         }
1864       }
1865       controls.clear();
1866       memories.clear();
1867     }
1868     for( uint i = 0; i < n->len(); ++i ) {
1869       Node *m = n->in(i);
1870       if (m != NULL) {
1871         nodes.push(m);
1872       }
1873     }
1874   }
1875 }
1876 #endif
1877 
1878 ShenandoahEnqueueBarrierNode::ShenandoahEnqueueBarrierNode(Node* val) : Node(NULL, val) {
1879   ShenandoahBarrierSetC2::bsc2()->state()->add_enqueue_barrier(this);
1880 }
1881 
1882 const Type* ShenandoahEnqueueBarrierNode::bottom_type() const {
1883   if (in(1) == NULL || in(1)->is_top()) {
1884     return Type::TOP;
1885   }
1886   const Type* t = in(1)->bottom_type();
1887   if (t == TypePtr::NULL_PTR) {
1888     return t;
1889   }
1890   return t->is_oopptr();
1891 }
1892 
1893 const Type* ShenandoahEnqueueBarrierNode::Value(PhaseGVN* phase) const {
1894   if (in(1) == NULL) {
1895     return Type::TOP;
1896   }
1897   const Type* t = phase->type(in(1));
1898   if (t == Type::TOP) {
1899     return Type::TOP;
1900   }
1901   if (t == TypePtr::NULL_PTR) {
1902     return t;
1903   }
1904   return t->is_oopptr();
1905 }
1906 
1907 int ShenandoahEnqueueBarrierNode::needed(Node* n) {
1908   if (n == NULL ||
1909       n->is_Allocate() ||
1910       n->Opcode() == Op_ShenandoahEnqueueBarrier ||
1911       n->bottom_type() == TypePtr::NULL_PTR ||
1912       (n->bottom_type()->make_oopptr() != NULL && n->bottom_type()->make_oopptr()->const_oop() != NULL)) {
1913     return NotNeeded;
1914   }
1915   if (n->is_Phi() ||
1916       n->is_CMove()) {
1917     return MaybeNeeded;
1918   }
1919   return Needed;
1920 }
1921 
1922 Node* ShenandoahEnqueueBarrierNode::next(Node* n) {
1923   for (;;) {
1924     if (n == NULL) {
1925       return n;
1926     } else if (n->bottom_type() == TypePtr::NULL_PTR) {
1927       return n;
1928     } else if (n->bottom_type()->make_oopptr() != NULL && n->bottom_type()->make_oopptr()->const_oop() != NULL) {
1929       return n;
1930     } else if (n->is_ConstraintCast() ||
1931                n->Opcode() == Op_DecodeN ||
1932                n->Opcode() == Op_EncodeP) {
1933       n = n->in(1);
1934     } else if (n->is_Proj()) {
1935       n = n->in(0);
1936     } else {
1937       return n;
1938     }
1939   }
1940   ShouldNotReachHere();
1941   return NULL;
1942 }
1943 
1944 Node* ShenandoahEnqueueBarrierNode::Identity(PhaseGVN* phase) {
1945   PhaseIterGVN* igvn = phase->is_IterGVN();
1946 
1947   Node* n = next(in(1));
1948 
1949   int cont = needed(n);
1950 
1951   if (cont == NotNeeded) {
1952     return in(1);
1953   } else if (cont == MaybeNeeded) {
1954     if (igvn == NULL) {
1955       phase->record_for_igvn(this);
1956       return this;
1957     } else {
1958       ResourceMark rm;
1959       Unique_Node_List wq;
1960       uint wq_i = 0;
1961 
1962       for (;;) {
1963         if (n->is_Phi()) {
1964           for (uint i = 1; i < n->req(); i++) {
1965             Node* m = n->in(i);
1966             if (m != NULL) {
1967               wq.push(m);
1968             }
1969           }
1970         } else {
1971           assert(n->is_CMove(), "nothing else here");
1972           Node* m = n->in(CMoveNode::IfFalse);
1973           wq.push(m);
1974           m = n->in(CMoveNode::IfTrue);
1975           wq.push(m);
1976         }
1977         Node* orig_n = NULL;
1978         do {
1979           if (wq_i >= wq.size()) {
1980             return in(1);
1981           }
1982           n = wq.at(wq_i);
1983           wq_i++;
1984           orig_n = n;
1985           n = next(n);
1986           cont = needed(n);
1987           if (cont == Needed) {
1988             return this;
1989           }
1990         } while (cont != MaybeNeeded || (orig_n != n && wq.member(n)));
1991       }
1992     }
1993   }
1994 
1995   return this;
1996 }
1997 
1998 #ifdef ASSERT
1999 static bool has_never_branch(Node* root) {
2000   for (uint i = 1; i < root->req(); i++) {
2001     Node* in = root->in(i);
2002     if (in != NULL && in->Opcode() == Op_Halt && in->in(0)->is_Proj() && in->in(0)->in(0)->Opcode() == Op_NeverBranch) {
2003       return true;
2004     }
2005   }
2006   return false;
2007 }
2008 #endif
2009 
2010 void MemoryGraphFixer::collect_memory_nodes() {
2011   Node_Stack stack(0);
2012   VectorSet visited(Thread::current()->resource_area());
2013   Node_List regions;
2014 
2015   // Walk the raw memory graph and create a mapping from CFG node to
2016   // memory node. Exclude phis for now.
2017   stack.push(_phase->C->root(), 1);
2018   do {
2019     Node* n = stack.node();
2020     int opc = n->Opcode();
2021     uint i = stack.index();
2022     if (i < n->req()) {
2023       Node* mem = NULL;
2024       if (opc == Op_Root) {
2025         Node* in = n->in(i);
2026         int in_opc = in->Opcode();
2027         if (in_opc == Op_Return || in_opc == Op_Rethrow) {
2028           mem = in->in(TypeFunc::Memory);
2029         } else if (in_opc == Op_Halt) {
2030           if (in->in(0)->is_Region()) {
2031             Node* r = in->in(0);
2032             for (uint j = 1; j < r->req(); j++) {
2033               assert(r->in(j)->Opcode() != Op_NeverBranch, "");
2034             }
2035           } else {
2036             Node* proj = in->in(0);
2037             assert(proj->is_Proj(), "");
2038             Node* in = proj->in(0);
2039             assert(in->is_CallStaticJava() || in->Opcode() == Op_NeverBranch || in->Opcode() == Op_Catch || proj->is_IfProj(), "");
2040             if (in->is_CallStaticJava()) {
2041               mem = in->in(TypeFunc::Memory);
2042             } else if (in->Opcode() == Op_Catch) {
2043               Node* call = in->in(0)->in(0);
2044               assert(call->is_Call(), "");
2045               mem = call->in(TypeFunc::Memory);
2046             } else if (in->Opcode() == Op_NeverBranch) {
2047               Node* head = in->in(0);
2048               assert(head->is_Region() && head->req() == 3, "unexpected infinite loop graph shape");
2049               assert(_phase->is_dominator(head, head->in(1)) || _phase->is_dominator(head, head->in(2)), "no back branch?");
2050               Node* tail = _phase->is_dominator(head, head->in(1)) ? head->in(1) : head->in(2);
2051               Node* c = tail;
2052               while (c != head) {
2053                 if (c->is_SafePoint() && !c->is_CallLeaf()) {
2054                   mem = c->in(TypeFunc::Memory);
2055                 }
2056                 c = _phase->idom(c);
2057               }
2058               assert(mem != NULL, "should have found safepoint");
2059 
2060               Node* phi_mem = NULL;
2061               for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
2062                 Node* u = head->fast_out(j);
2063                 if (u->is_Phi() && u->bottom_type() == Type::MEMORY) {
2064                   if (_phase->C->get_alias_index(u->adr_type()) == _alias) {
2065                     assert(phi_mem == NULL || phi_mem->adr_type() == TypePtr::BOTTOM, "");
2066                     phi_mem = u;
2067                   } else if (u->adr_type() == TypePtr::BOTTOM) {
2068                     assert(phi_mem == NULL || _phase->C->get_alias_index(phi_mem->adr_type()) == _alias, "");
2069                     if (phi_mem == NULL) {
2070                       phi_mem = u;
2071                     }
2072                   }
2073                 }
2074               }
2075               if (phi_mem != NULL) {
2076                 mem = phi_mem;
2077               }
2078             }
2079           }
2080         } else {
2081 #ifdef ASSERT
2082           n->dump();
2083           in->dump();
2084 #endif
2085           ShouldNotReachHere();
2086         }
2087       } else {
2088         assert(n->is_Phi() && n->bottom_type() == Type::MEMORY, "");
2089         assert(n->adr_type() == TypePtr::BOTTOM || _phase->C->get_alias_index(n->adr_type()) == _alias, "");
2090         mem = n->in(i);
2091       }
2092       i++;
2093       stack.set_index(i);
2094       if (mem == NULL) {
2095         continue;
2096       }
2097       for (;;) {
2098         if (visited.test_set(mem->_idx) || mem->is_Start()) {
2099           break;
2100         }
2101         if (mem->is_Phi()) {
2102           stack.push(mem, 2);
2103           mem = mem->in(1);
2104         } else if (mem->is_Proj()) {
2105           stack.push(mem, mem->req());
2106           mem = mem->in(0);
2107         } else if (mem->is_SafePoint() || mem->is_MemBar()) {
2108           mem = mem->in(TypeFunc::Memory);
2109         } else if (mem->is_MergeMem()) {
2110           MergeMemNode* mm = mem->as_MergeMem();
2111           mem = mm->memory_at(_alias);
2112         } else if (mem->is_Store() || mem->is_LoadStore() || mem->is_ClearArray()) {
2113           assert(_alias == Compile::AliasIdxRaw, "");
2114           stack.push(mem, mem->req());
2115           mem = mem->in(MemNode::Memory);
2116         } else {
2117 #ifdef ASSERT
2118           mem->dump();
2119 #endif
2120           ShouldNotReachHere();
2121         }
2122       }
2123     } else {
2124       if (n->is_Phi()) {
2125         // Nothing
2126       } else if (!n->is_Root()) {
2127         Node* c = get_ctrl(n);
2128         _memory_nodes.map(c->_idx, n);
2129       }
2130       stack.pop();
2131     }
2132   } while(stack.is_nonempty());
2133 
2134   // Iterate over CFG nodes in rpo and propagate memory state to
2135   // compute memory state at regions, creating new phis if needed.
2136   Node_List rpo_list;
2137   visited.clear();
2138   _phase->rpo(_phase->C->root(), stack, visited, rpo_list);
2139   Node* root = rpo_list.pop();
2140   assert(root == _phase->C->root(), "");
2141 
2142   const bool trace = false;
2143 #ifdef ASSERT
2144   if (trace) {
2145     for (int i = rpo_list.size() - 1; i >= 0; i--) {
2146       Node* c = rpo_list.at(i);
2147       if (_memory_nodes[c->_idx] != NULL) {
2148         tty->print("X %d", c->_idx);  _memory_nodes[c->_idx]->dump();
2149       }
2150     }
2151   }
2152 #endif
2153   uint last = _phase->C->unique();
2154 
2155 #ifdef ASSERT
2156   uint8_t max_depth = 0;
2157   for (LoopTreeIterator iter(_phase->ltree_root()); !iter.done(); iter.next()) {
2158     IdealLoopTree* lpt = iter.current();
2159     max_depth = MAX2(max_depth, lpt->_nest);
2160   }
2161 #endif
2162 
2163   bool progress = true;
2164   int iteration = 0;
2165   Node_List dead_phis;
2166   while (progress) {
2167     progress = false;
2168     iteration++;
2169     assert(iteration <= 2+max_depth || _phase->C->has_irreducible_loop() || has_never_branch(_phase->C->root()), "");
2170     if (trace) { tty->print_cr("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); }
2171     IdealLoopTree* last_updated_ilt = NULL;
2172     for (int i = rpo_list.size() - 1; i >= 0; i--) {
2173       Node* c = rpo_list.at(i);
2174 
2175       Node* prev_mem = _memory_nodes[c->_idx];
2176       if (c->is_Region() && (_include_lsm || !c->is_OuterStripMinedLoop())) {
2177         Node* prev_region = regions[c->_idx];
2178         Node* unique = NULL;
2179         for (uint j = 1; j < c->req() && unique != NodeSentinel; j++) {
2180           Node* m = _memory_nodes[c->in(j)->_idx];
2181           assert(m != NULL || (c->is_Loop() && j == LoopNode::LoopBackControl && iteration == 1) || _phase->C->has_irreducible_loop() || has_never_branch(_phase->C->root()), "expect memory state");
2182           if (m != NULL) {
2183             if (m == prev_region && ((c->is_Loop() && j == LoopNode::LoopBackControl) || (prev_region->is_Phi() && prev_region->in(0) == c))) {
2184               assert(c->is_Loop() && j == LoopNode::LoopBackControl || _phase->C->has_irreducible_loop(), "");
2185               // continue
2186             } else if (unique == NULL) {
2187               unique = m;
2188             } else if (m == unique) {
2189               // continue
2190             } else {
2191               unique = NodeSentinel;
2192             }
2193           }
2194         }
2195         assert(unique != NULL, "empty phi???");
2196         if (unique != NodeSentinel) {
2197           if (prev_region != NULL && prev_region->is_Phi() && prev_region->in(0) == c) {
2198             dead_phis.push(prev_region);
2199           }
2200           regions.map(c->_idx, unique);
2201         } else {
2202           Node* phi = NULL;
2203           if (prev_region != NULL && prev_region->is_Phi() && prev_region->in(0) == c && prev_region->_idx >= last) {
2204             phi = prev_region;
2205             for (uint k = 1; k < c->req(); k++) {
2206               Node* m = _memory_nodes[c->in(k)->_idx];
2207               assert(m != NULL, "expect memory state");
2208               phi->set_req(k, m);
2209             }
2210           } else {
2211             for (DUIterator_Fast jmax, j = c->fast_outs(jmax); j < jmax && phi == NULL; j++) {
2212               Node* u = c->fast_out(j);
2213               if (u->is_Phi() && u->bottom_type() == Type::MEMORY &&
2214                   (u->adr_type() == TypePtr::BOTTOM || _phase->C->get_alias_index(u->adr_type()) == _alias)) {
2215                 phi = u;
2216                 for (uint k = 1; k < c->req() && phi != NULL; k++) {
2217                   Node* m = _memory_nodes[c->in(k)->_idx];
2218                   assert(m != NULL, "expect memory state");
2219                   if (u->in(k) != m) {
2220                     phi = NULL;
2221                   }
2222                 }
2223               }
2224             }
2225             if (phi == NULL) {
2226               phi = new PhiNode(c, Type::MEMORY, _phase->C->get_adr_type(_alias));
2227               for (uint k = 1; k < c->req(); k++) {
2228                 Node* m = _memory_nodes[c->in(k)->_idx];
2229                 assert(m != NULL, "expect memory state");
2230                 phi->init_req(k, m);
2231               }
2232             }
2233           }
2234           assert(phi != NULL, "");
2235           regions.map(c->_idx, phi);
2236         }
2237         Node* current_region = regions[c->_idx];
2238         if (current_region != prev_region) {
2239           progress = true;
2240           if (prev_region == prev_mem) {
2241             _memory_nodes.map(c->_idx, current_region);
2242           }
2243         }
2244       } else if (prev_mem == NULL || prev_mem->is_Phi() || ctrl_or_self(prev_mem) != c) {
2245         Node* m = _memory_nodes[_phase->idom(c)->_idx];
2246         assert(m != NULL, "expect memory state");
2247         if (m != prev_mem) {
2248           _memory_nodes.map(c->_idx, m);
2249           progress = true;
2250         }
2251       }
2252 #ifdef ASSERT
2253       if (trace) { tty->print("X %d", c->_idx);  _memory_nodes[c->_idx]->dump(); }
2254 #endif
2255     }
2256   }
2257 
2258   // Replace existing phi with computed memory state for that region
2259   // if different (could be a new phi or a dominating memory node if
2260   // that phi was found to be useless).
2261   while (dead_phis.size() > 0) {
2262     Node* n = dead_phis.pop();
2263     n->replace_by(_phase->C->top());
2264     n->destruct();
2265   }
2266   for (int i = rpo_list.size() - 1; i >= 0; i--) {
2267     Node* c = rpo_list.at(i);
2268     if (c->is_Region() && (_include_lsm || !c->is_OuterStripMinedLoop())) {
2269       Node* n = regions[c->_idx];
2270       if (n->is_Phi() && n->_idx >= last && n->in(0) == c) {
2271         _phase->register_new_node(n, c);
2272       }
2273     }
2274   }
2275   for (int i = rpo_list.size() - 1; i >= 0; i--) {
2276     Node* c = rpo_list.at(i);
2277     if (c->is_Region() && (_include_lsm || !c->is_OuterStripMinedLoop())) {
2278       Node* n = regions[c->_idx];
2279       for (DUIterator_Fast imax, i = c->fast_outs(imax); i < imax; i++) {
2280         Node* u = c->fast_out(i);
2281         if (u->is_Phi() && u->bottom_type() == Type::MEMORY &&
2282             u != n) {
2283           if (u->adr_type() == TypePtr::BOTTOM) {
2284             fix_memory_uses(u, n, n, c);
2285           } else if (_phase->C->get_alias_index(u->adr_type()) == _alias) {
2286             _phase->lazy_replace(u, n);
2287             --i; --imax;
2288           }
2289         }
2290       }
2291     }
2292   }
2293 }
2294 
2295 Node* MemoryGraphFixer::get_ctrl(Node* n) const {
2296   Node* c = _phase->get_ctrl(n);
2297   if (n->is_Proj() && n->in(0) != NULL && n->in(0)->is_Call()) {
2298     assert(c == n->in(0), "");
2299     CallNode* call = c->as_Call();
2300     CallProjections projs;
2301     call->extract_projections(&projs, true, false);
2302     if (projs.catchall_memproj != NULL) {
2303       if (projs.fallthrough_memproj == n) {
2304         c = projs.fallthrough_catchproj;
2305       } else {
2306         assert(projs.catchall_memproj == n, "");
2307         c = projs.catchall_catchproj;
2308       }
2309     }
2310   }
2311   return c;
2312 }
2313 
2314 Node* MemoryGraphFixer::ctrl_or_self(Node* n) const {
2315   if (_phase->has_ctrl(n))
2316     return get_ctrl(n);
2317   else {
2318     assert (n->is_CFG(), "must be a CFG node");
2319     return n;
2320   }
2321 }
2322 
2323 bool MemoryGraphFixer::mem_is_valid(Node* m, Node* c) const {
2324   return m != NULL && get_ctrl(m) == c;
2325 }
2326 
2327 Node* MemoryGraphFixer::find_mem(Node* ctrl, Node* n) const {
2328   assert(n == NULL || _phase->ctrl_or_self(n) == ctrl, "");
2329   Node* mem = _memory_nodes[ctrl->_idx];
2330   Node* c = ctrl;
2331   while (!mem_is_valid(mem, c) &&
2332          (!c->is_CatchProj() || mem == NULL || c->in(0)->in(0)->in(0) != get_ctrl(mem))) {
2333     c = _phase->idom(c);
2334     mem = _memory_nodes[c->_idx];
2335   }
2336   if (n != NULL && mem_is_valid(mem, c)) {
2337     while (!ShenandoahBarrierC2Support::is_dominator_same_ctrl(c, mem, n, _phase) && _phase->ctrl_or_self(mem) == ctrl) {
2338       mem = next_mem(mem, _alias);
2339     }
2340     if (mem->is_MergeMem()) {
2341       mem = mem->as_MergeMem()->memory_at(_alias);
2342     }
2343     if (!mem_is_valid(mem, c)) {
2344       do {
2345         c = _phase->idom(c);
2346         mem = _memory_nodes[c->_idx];
2347       } while (!mem_is_valid(mem, c) &&
2348                (!c->is_CatchProj() || mem == NULL || c->in(0)->in(0)->in(0) != get_ctrl(mem)));
2349     }
2350   }
2351   assert(mem->bottom_type() == Type::MEMORY, "");
2352   return mem;
2353 }
2354 
2355 bool MemoryGraphFixer::has_mem_phi(Node* region) const {
2356   for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
2357     Node* use = region->fast_out(i);
2358     if (use->is_Phi() && use->bottom_type() == Type::MEMORY &&
2359         (_phase->C->get_alias_index(use->adr_type()) == _alias)) {
2360       return true;
2361     }
2362   }
2363   return false;
2364 }
2365 
2366 void MemoryGraphFixer::fix_mem(Node* ctrl, Node* new_ctrl, Node* mem, Node* mem_for_ctrl, Node* new_mem, Unique_Node_List& uses) {
2367   assert(_phase->ctrl_or_self(new_mem) == new_ctrl, "");
2368   const bool trace = false;
2369   DEBUG_ONLY(if (trace) { tty->print("ZZZ control is"); ctrl->dump(); });
2370   DEBUG_ONLY(if (trace) { tty->print("ZZZ mem is"); mem->dump(); });
2371   GrowableArray<Node*> phis;
2372   if (mem_for_ctrl != mem) {
2373     Node* old = mem_for_ctrl;
2374     Node* prev = NULL;
2375     while (old != mem) {
2376       prev = old;
2377       if (old->is_Store() || old->is_ClearArray() || old->is_LoadStore()) {
2378         assert(_alias == Compile::AliasIdxRaw, "");
2379         old = old->in(MemNode::Memory);
2380       } else if (old->Opcode() == Op_SCMemProj) {
2381         assert(_alias == Compile::AliasIdxRaw, "");
2382         old = old->in(0);
2383       } else {
2384         ShouldNotReachHere();
2385       }
2386     }
2387     assert(prev != NULL, "");
2388     if (new_ctrl != ctrl) {
2389       _memory_nodes.map(ctrl->_idx, mem);
2390       _memory_nodes.map(new_ctrl->_idx, mem_for_ctrl);
2391     }
2392     uint input = (uint)MemNode::Memory;
2393     _phase->igvn().replace_input_of(prev, input, new_mem);
2394   } else {
2395     uses.clear();
2396     _memory_nodes.map(new_ctrl->_idx, new_mem);
2397     uses.push(new_ctrl);
2398     for(uint next = 0; next < uses.size(); next++ ) {
2399       Node *n = uses.at(next);
2400       assert(n->is_CFG(), "");
2401       DEBUG_ONLY(if (trace) { tty->print("ZZZ ctrl"); n->dump(); });
2402       for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
2403         Node* u = n->fast_out(i);
2404         if (!u->is_Root() && u->is_CFG() && u != n) {
2405           Node* m = _memory_nodes[u->_idx];
2406           if (u->is_Region() && (!u->is_OuterStripMinedLoop() || _include_lsm) &&
2407               !has_mem_phi(u) &&
2408               u->unique_ctrl_out()->Opcode() != Op_Halt) {
2409             DEBUG_ONLY(if (trace) { tty->print("ZZZ region"); u->dump(); });
2410             DEBUG_ONLY(if (trace && m != NULL) { tty->print("ZZZ mem"); m->dump(); });
2411 
2412             if (!mem_is_valid(m, u) || !m->is_Phi()) {
2413               bool push = true;
2414               bool create_phi = true;
2415               if (_phase->is_dominator(new_ctrl, u)) {
2416                 create_phi = false;
2417               }
2418               if (create_phi) {
2419                 Node* phi = new PhiNode(u, Type::MEMORY, _phase->C->get_adr_type(_alias));
2420                 _phase->register_new_node(phi, u);
2421                 phis.push(phi);
2422                 DEBUG_ONLY(if (trace) { tty->print("ZZZ new phi"); phi->dump(); });
2423                 if (!mem_is_valid(m, u)) {
2424                   DEBUG_ONLY(if (trace) { tty->print("ZZZ setting mem"); phi->dump(); });
2425                   _memory_nodes.map(u->_idx, phi);
2426                 } else {
2427                   DEBUG_ONLY(if (trace) { tty->print("ZZZ NOT setting mem"); m->dump(); });
2428                   for (;;) {
2429                     assert(m->is_Mem() || m->is_LoadStore() || m->is_Proj(), "");
2430                     Node* next = NULL;
2431                     if (m->is_Proj()) {
2432                       next = m->in(0);
2433                     } else {
2434                       assert(m->is_Mem() || m->is_LoadStore(), "");
2435                       assert(_alias == Compile::AliasIdxRaw, "");
2436                       next = m->in(MemNode::Memory);
2437                     }
2438                     if (_phase->get_ctrl(next) != u) {
2439                       break;
2440                     }
2441                     if (next->is_MergeMem()) {
2442                       assert(_phase->get_ctrl(next->as_MergeMem()->memory_at(_alias)) != u, "");
2443                       break;
2444                     }
2445                     if (next->is_Phi()) {
2446                       assert(next->adr_type() == TypePtr::BOTTOM && next->in(0) == u, "");
2447                       break;
2448                     }
2449                     m = next;
2450                   }
2451 
2452                   DEBUG_ONLY(if (trace) { tty->print("ZZZ setting to phi"); m->dump(); });
2453                   assert(m->is_Mem() || m->is_LoadStore(), "");
2454                   uint input = (uint)MemNode::Memory;
2455                   _phase->igvn().replace_input_of(m, input, phi);
2456                   push = false;
2457                 }
2458               } else {
2459                 DEBUG_ONLY(if (trace) { tty->print("ZZZ skipping region"); u->dump(); });
2460               }
2461               if (push) {
2462                 uses.push(u);
2463               }
2464             }
2465           } else if (!mem_is_valid(m, u) &&
2466                      !(u->Opcode() == Op_CProj && u->in(0)->Opcode() == Op_NeverBranch && u->as_Proj()->_con == 1)) {
2467             uses.push(u);
2468           }
2469         }
2470       }
2471     }
2472     for (int i = 0; i < phis.length(); i++) {
2473       Node* n = phis.at(i);
2474       Node* r = n->in(0);
2475       DEBUG_ONLY(if (trace) { tty->print("ZZZ fixing new phi"); n->dump(); });
2476       for (uint j = 1; j < n->req(); j++) {
2477         Node* m = find_mem(r->in(j), NULL);
2478         _phase->igvn().replace_input_of(n, j, m);
2479         DEBUG_ONLY(if (trace) { tty->print("ZZZ fixing new phi: %d", j); m->dump(); });
2480       }
2481     }
2482   }
2483   uint last = _phase->C->unique();
2484   MergeMemNode* mm = NULL;
2485   int alias = _alias;
2486   DEBUG_ONLY(if (trace) { tty->print("ZZZ raw mem is"); mem->dump(); });
2487   // Process loads first to not miss an anti-dependency: if the memory
2488   // edge of a store is updated before a load is processed then an
2489   // anti-dependency may be missed.
2490   for (DUIterator i = mem->outs(); mem->has_out(i); i++) {
2491     Node* u = mem->out(i);
2492     if (u->_idx < last && u->is_Load() && _phase->C->get_alias_index(u->adr_type()) == alias) {
2493       Node* m = find_mem(_phase->get_ctrl(u), u);
2494       if (m != mem) {
2495         DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); });
2496         _phase->igvn().replace_input_of(u, MemNode::Memory, m);
2497         --i;
2498       }
2499     }
2500   }
2501   for (DUIterator i = mem->outs(); mem->has_out(i); i++) {
2502     Node* u = mem->out(i);
2503     if (u->_idx < last) {
2504       if (u->is_Mem()) {
2505         if (_phase->C->get_alias_index(u->adr_type()) == alias) {
2506           Node* m = find_mem(_phase->get_ctrl(u), u);
2507           if (m != mem) {
2508             DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); });
2509             _phase->igvn().replace_input_of(u, MemNode::Memory, m);
2510             --i;
2511           }
2512         }
2513       } else if (u->is_MergeMem()) {
2514         MergeMemNode* u_mm = u->as_MergeMem();
2515         if (u_mm->memory_at(alias) == mem) {
2516           MergeMemNode* newmm = NULL;
2517           for (DUIterator_Fast jmax, j = u->fast_outs(jmax); j < jmax; j++) {
2518             Node* uu = u->fast_out(j);
2519             assert(!uu->is_MergeMem(), "chain of MergeMems?");
2520             if (uu->is_Phi()) {
2521               assert(uu->adr_type() == TypePtr::BOTTOM, "");
2522               Node* region = uu->in(0);
2523               int nb = 0;
2524               for (uint k = 1; k < uu->req(); k++) {
2525                 if (uu->in(k) == u) {
2526                   Node* m = find_mem(region->in(k), NULL);
2527                   if (m != mem) {
2528                     DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of phi %d", k); uu->dump(); });
2529                     newmm = clone_merge_mem(u, mem, m, _phase->ctrl_or_self(m), i);
2530                     if (newmm != u) {
2531                       _phase->igvn().replace_input_of(uu, k, newmm);
2532                       nb++;
2533                       --jmax;
2534                     }
2535                   }
2536                 }
2537               }
2538               if (nb > 0) {
2539                 --j;
2540               }
2541             } else {
2542               Node* m = find_mem(_phase->ctrl_or_self(uu), uu);
2543               if (m != mem) {
2544                 DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); uu->dump(); });
2545                 newmm = clone_merge_mem(u, mem, m, _phase->ctrl_or_self(m), i);
2546                 if (newmm != u) {
2547                   _phase->igvn().replace_input_of(uu, uu->find_edge(u), newmm);
2548                   --j, --jmax;
2549                 }
2550               }
2551             }
2552           }
2553         }
2554       } else if (u->is_Phi()) {
2555         assert(u->bottom_type() == Type::MEMORY, "what else?");
2556         if (_phase->C->get_alias_index(u->adr_type()) == alias || u->adr_type() == TypePtr::BOTTOM) {
2557           Node* region = u->in(0);
2558           bool replaced = false;
2559           for (uint j = 1; j < u->req(); j++) {
2560             if (u->in(j) == mem) {
2561               Node* m = find_mem(region->in(j), NULL);
2562               Node* nnew = m;
2563               if (m != mem) {
2564                 if (u->adr_type() == TypePtr::BOTTOM) {
2565                   mm = allocate_merge_mem(mem, m, _phase->ctrl_or_self(m));
2566                   nnew = mm;
2567                 }
2568                 DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of phi %d", j); u->dump(); });
2569                 _phase->igvn().replace_input_of(u, j, nnew);
2570                 replaced = true;
2571               }
2572             }
2573           }
2574           if (replaced) {
2575             --i;
2576           }
2577         }
2578       } else if ((u->adr_type() == TypePtr::BOTTOM && u->Opcode() != Op_StrInflatedCopy) ||
2579                  u->adr_type() == NULL) {
2580         assert(u->adr_type() != NULL ||
2581                u->Opcode() == Op_Rethrow ||
2582                u->Opcode() == Op_Return ||
2583                u->Opcode() == Op_SafePoint ||
2584                (u->is_CallStaticJava() && u->as_CallStaticJava()->uncommon_trap_request() != 0) ||
2585                (u->is_CallStaticJava() && u->as_CallStaticJava()->_entry_point == OptoRuntime::rethrow_stub()) ||
2586                u->Opcode() == Op_CallLeaf, "");
2587         Node* m = find_mem(_phase->ctrl_or_self(u), u);
2588         if (m != mem) {
2589           mm = allocate_merge_mem(mem, m, _phase->get_ctrl(m));
2590           _phase->igvn().replace_input_of(u, u->find_edge(mem), mm);
2591           --i;
2592         }
2593       } else if (_phase->C->get_alias_index(u->adr_type()) == alias) {
2594         Node* m = find_mem(_phase->ctrl_or_self(u), u);
2595         if (m != mem) {
2596           DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); });
2597           _phase->igvn().replace_input_of(u, u->find_edge(mem), m);
2598           --i;
2599         }
2600       } else if (u->adr_type() != TypePtr::BOTTOM &&
2601                  _memory_nodes[_phase->ctrl_or_self(u)->_idx] == u) {
2602         Node* m = find_mem(_phase->ctrl_or_self(u), u);
2603         assert(m != mem, "");
2604         // u is on the wrong slice...
2605         assert(u->is_ClearArray(), "");
2606         DEBUG_ONLY(if (trace) { tty->print("ZZZ setting memory of use"); u->dump(); });
2607         _phase->igvn().replace_input_of(u, u->find_edge(mem), m);
2608         --i;
2609       }
2610     }
2611   }
2612 #ifdef ASSERT
2613   assert(new_mem->outcnt() > 0, "");
2614   for (int i = 0; i < phis.length(); i++) {
2615     Node* n = phis.at(i);
2616     assert(n->outcnt() > 0, "new phi must have uses now");
2617   }
2618 #endif
2619 }
2620 
2621 MergeMemNode* MemoryGraphFixer::allocate_merge_mem(Node* mem, Node* rep_proj, Node* rep_ctrl) const {
2622   MergeMemNode* mm = MergeMemNode::make(mem);
2623   mm->set_memory_at(_alias, rep_proj);
2624   _phase->register_new_node(mm, rep_ctrl);
2625   return mm;
2626 }
2627 
2628 MergeMemNode* MemoryGraphFixer::clone_merge_mem(Node* u, Node* mem, Node* rep_proj, Node* rep_ctrl, DUIterator& i) const {
2629   MergeMemNode* newmm = NULL;
2630   MergeMemNode* u_mm = u->as_MergeMem();
2631   Node* c = _phase->get_ctrl(u);
2632   if (_phase->is_dominator(c, rep_ctrl)) {
2633     c = rep_ctrl;
2634   } else {
2635     assert(_phase->is_dominator(rep_ctrl, c), "one must dominate the other");
2636   }
2637   if (u->outcnt() == 1) {
2638     if (u->req() > (uint)_alias && u->in(_alias) == mem) {
2639       _phase->igvn().replace_input_of(u, _alias, rep_proj);
2640       --i;
2641     } else {
2642       _phase->igvn().rehash_node_delayed(u);
2643       u_mm->set_memory_at(_alias, rep_proj);
2644     }
2645     newmm = u_mm;
2646     _phase->set_ctrl_and_loop(u, c);
2647   } else {
2648     // can't simply clone u and then change one of its input because
2649     // it adds and then removes an edge which messes with the
2650     // DUIterator
2651     newmm = MergeMemNode::make(u_mm->base_memory());
2652     for (uint j = 0; j < u->req(); j++) {
2653       if (j < newmm->req()) {
2654         if (j == (uint)_alias) {
2655           newmm->set_req(j, rep_proj);
2656         } else if (newmm->in(j) != u->in(j)) {
2657           newmm->set_req(j, u->in(j));
2658         }
2659       } else if (j == (uint)_alias) {
2660         newmm->add_req(rep_proj);
2661       } else {
2662         newmm->add_req(u->in(j));
2663       }
2664     }
2665     if ((uint)_alias >= u->req()) {
2666       newmm->set_memory_at(_alias, rep_proj);
2667     }
2668     _phase->register_new_node(newmm, c);
2669   }
2670   return newmm;
2671 }
2672 
2673 bool MemoryGraphFixer::should_process_phi(Node* phi) const {
2674   if (phi->adr_type() == TypePtr::BOTTOM) {
2675     Node* region = phi->in(0);
2676     for (DUIterator_Fast jmax, j = region->fast_outs(jmax); j < jmax; j++) {
2677       Node* uu = region->fast_out(j);
2678       if (uu->is_Phi() && uu != phi && uu->bottom_type() == Type::MEMORY && _phase->C->get_alias_index(uu->adr_type()) == _alias) {
2679         return false;
2680       }
2681     }
2682     return true;
2683   }
2684   return _phase->C->get_alias_index(phi->adr_type()) == _alias;
2685 }
2686 
2687 void MemoryGraphFixer::fix_memory_uses(Node* mem, Node* replacement, Node* rep_proj, Node* rep_ctrl) const {
2688   uint last = _phase-> C->unique();
2689   MergeMemNode* mm = NULL;
2690   assert(mem->bottom_type() == Type::MEMORY, "");
2691   for (DUIterator i = mem->outs(); mem->has_out(i); i++) {
2692     Node* u = mem->out(i);
2693     if (u != replacement && u->_idx < last) {
2694       if (u->is_MergeMem()) {
2695         MergeMemNode* u_mm = u->as_MergeMem();
2696         if (u_mm->memory_at(_alias) == mem) {
2697           MergeMemNode* newmm = NULL;
2698           for (DUIterator_Fast jmax, j = u->fast_outs(jmax); j < jmax; j++) {
2699             Node* uu = u->fast_out(j);
2700             assert(!uu->is_MergeMem(), "chain of MergeMems?");
2701             if (uu->is_Phi()) {
2702               if (should_process_phi(uu)) {
2703                 Node* region = uu->in(0);
2704                 int nb = 0;
2705                 for (uint k = 1; k < uu->req(); k++) {
2706                   if (uu->in(k) == u && _phase->is_dominator(rep_ctrl, region->in(k))) {
2707                     if (newmm == NULL) {
2708                       newmm = clone_merge_mem(u, mem, rep_proj, rep_ctrl, i);
2709                     }
2710                     if (newmm != u) {
2711                       _phase->igvn().replace_input_of(uu, k, newmm);
2712                       nb++;
2713                       --jmax;
2714                     }
2715                   }
2716                 }
2717                 if (nb > 0) {
2718                   --j;
2719                 }
2720               }
2721             } else {
2722               if (rep_ctrl != uu && ShenandoahBarrierC2Support::is_dominator(rep_ctrl, _phase->ctrl_or_self(uu), replacement, uu, _phase)) {
2723                 if (newmm == NULL) {
2724                   newmm = clone_merge_mem(u, mem, rep_proj, rep_ctrl, i);
2725                 }
2726                 if (newmm != u) {
2727                   _phase->igvn().replace_input_of(uu, uu->find_edge(u), newmm);
2728                   --j, --jmax;
2729                 }
2730               }
2731             }
2732           }
2733         }
2734       } else if (u->is_Phi()) {
2735         assert(u->bottom_type() == Type::MEMORY, "what else?");
2736         Node* region = u->in(0);
2737         if (should_process_phi(u)) {
2738           bool replaced = false;
2739           for (uint j = 1; j < u->req(); j++) {
2740             if (u->in(j) == mem && _phase->is_dominator(rep_ctrl, region->in(j))) {
2741               Node* nnew = rep_proj;
2742               if (u->adr_type() == TypePtr::BOTTOM) {
2743                 if (mm == NULL) {
2744                   mm = allocate_merge_mem(mem, rep_proj, rep_ctrl);
2745                 }
2746                 nnew = mm;
2747               }
2748               _phase->igvn().replace_input_of(u, j, nnew);
2749               replaced = true;
2750             }
2751           }
2752           if (replaced) {
2753             --i;
2754           }
2755 
2756         }
2757       } else if ((u->adr_type() == TypePtr::BOTTOM && u->Opcode() != Op_StrInflatedCopy) ||
2758                  u->adr_type() == NULL) {
2759         assert(u->adr_type() != NULL ||
2760                u->Opcode() == Op_Rethrow ||
2761                u->Opcode() == Op_Return ||
2762                u->Opcode() == Op_SafePoint ||
2763                u->Opcode() == Op_StoreIConditional ||
2764                u->Opcode() == Op_StoreLConditional ||
2765                (u->is_CallStaticJava() && u->as_CallStaticJava()->uncommon_trap_request() != 0) ||
2766                (u->is_CallStaticJava() && u->as_CallStaticJava()->_entry_point == OptoRuntime::rethrow_stub()) ||
2767                u->Opcode() == Op_CallLeaf, "%s", u->Name());
2768         if (ShenandoahBarrierC2Support::is_dominator(rep_ctrl, _phase->ctrl_or_self(u), replacement, u, _phase)) {
2769           if (mm == NULL) {
2770             mm = allocate_merge_mem(mem, rep_proj, rep_ctrl);
2771           }
2772           _phase->igvn().replace_input_of(u, u->find_edge(mem), mm);
2773           --i;
2774         }
2775       } else if (_phase->C->get_alias_index(u->adr_type()) == _alias) {
2776         if (ShenandoahBarrierC2Support::is_dominator(rep_ctrl, _phase->ctrl_or_self(u), replacement, u, _phase)) {
2777           _phase->igvn().replace_input_of(u, u->find_edge(mem), rep_proj);
2778           --i;
2779         }
2780       }
2781     }
2782   }
2783 }
2784 
2785 ShenandoahLoadReferenceBarrierNode::ShenandoahLoadReferenceBarrierNode(Node* ctrl, Node* obj, bool native)
2786 : Node(ctrl, obj), _native(native) {
2787   ShenandoahBarrierSetC2::bsc2()->state()->add_load_reference_barrier(this);
2788 }
2789 
2790 bool ShenandoahLoadReferenceBarrierNode::is_native() const {
2791   return _native;
2792 }
2793 
2794 uint ShenandoahLoadReferenceBarrierNode::size_of() const {
2795   return sizeof(*this);
2796 }
2797 
2798 uint ShenandoahLoadReferenceBarrierNode::hash() const {
2799   return Node::hash() + (_native ? 1 : 0);
2800 }
2801 
2802 bool ShenandoahLoadReferenceBarrierNode::cmp( const Node &n ) const {
2803   return Node::cmp(n) && n.Opcode() == Op_ShenandoahLoadReferenceBarrier &&
2804          _native == ((const ShenandoahLoadReferenceBarrierNode&)n)._native;
2805 }
2806 
2807 const Type* ShenandoahLoadReferenceBarrierNode::bottom_type() const {
2808   if (in(ValueIn) == NULL || in(ValueIn)->is_top()) {
2809     return Type::TOP;
2810   }
2811   const Type* t = in(ValueIn)->bottom_type();
2812   if (t == TypePtr::NULL_PTR) {
2813     return t;
2814   }
2815   return t->is_oopptr();
2816 }
2817 
2818 const Type* ShenandoahLoadReferenceBarrierNode::Value(PhaseGVN* phase) const {
2819   // Either input is TOP ==> the result is TOP
2820   const Type *t2 = phase->type(in(ValueIn));
2821   if( t2 == Type::TOP ) return Type::TOP;
2822 
2823   if (t2 == TypePtr::NULL_PTR) {
2824     return t2;
2825   }
2826 
2827   const Type* type = t2->is_oopptr();
2828   return type;
2829 }
2830 
2831 Node* ShenandoahLoadReferenceBarrierNode::Identity(PhaseGVN* phase) {
2832   Node* value = in(ValueIn);
2833   if (!needs_barrier(phase, value)) {
2834     return value;
2835   }
2836   return this;
2837 }
2838 
2839 bool ShenandoahLoadReferenceBarrierNode::needs_barrier(PhaseGVN* phase, Node* n) {
2840   Unique_Node_List visited;
2841   return needs_barrier_impl(phase, n, visited);
2842 }
2843 
2844 bool ShenandoahLoadReferenceBarrierNode::needs_barrier_impl(PhaseGVN* phase, Node* n, Unique_Node_List &visited) {
2845   if (n == NULL) return false;
2846   if (visited.member(n)) {
2847     return false; // Been there.
2848   }
2849   visited.push(n);
2850 
2851   if (n->is_Allocate()) {
2852     // tty->print_cr("optimize barrier on alloc");
2853     return false;
2854   }
2855   if (n->is_Call()) {
2856     // tty->print_cr("optimize barrier on call");
2857     return false;
2858   }
2859 
2860   const Type* type = phase->type(n);
2861   if (type == Type::TOP) {
2862     return false;
2863   }
2864   if (type->make_ptr()->higher_equal(TypePtr::NULL_PTR)) {
2865     // tty->print_cr("optimize barrier on null");
2866     return false;
2867   }
2868   if (type->make_oopptr() && type->make_oopptr()->const_oop() != NULL) {
2869     // tty->print_cr("optimize barrier on constant");
2870     return false;
2871   }
2872 
2873   switch (n->Opcode()) {
2874     case Op_AddP:
2875       return true; // TODO: Can refine?
2876     case Op_LoadP:
2877     case Op_ShenandoahCompareAndExchangeN:
2878     case Op_ShenandoahCompareAndExchangeP:
2879     case Op_CompareAndExchangeN:
2880     case Op_CompareAndExchangeP:
2881     case Op_GetAndSetN:
2882     case Op_GetAndSetP:
2883       return true;
2884     case Op_Phi: {
2885       for (uint i = 1; i < n->req(); i++) {
2886         if (needs_barrier_impl(phase, n->in(i), visited)) return true;
2887       }
2888       return false;
2889     }
2890     case Op_CheckCastPP:
2891     case Op_CastPP:
2892       return needs_barrier_impl(phase, n->in(1), visited);
2893     case Op_Proj:
2894       return needs_barrier_impl(phase, n->in(0), visited);
2895     case Op_ShenandoahLoadReferenceBarrier:
2896       // tty->print_cr("optimize barrier on barrier");
2897       return false;
2898     case Op_Parm:
2899       // tty->print_cr("optimize barrier on input arg");
2900       return false;
2901     case Op_DecodeN:
2902     case Op_EncodeP:
2903       return needs_barrier_impl(phase, n->in(1), visited);
2904     case Op_LoadN:
2905       return true;
2906     case Op_CMoveN:
2907     case Op_CMoveP:
2908       return needs_barrier_impl(phase, n->in(2), visited) ||
2909              needs_barrier_impl(phase, n->in(3), visited);
2910     case Op_ShenandoahEnqueueBarrier:
2911       return needs_barrier_impl(phase, n->in(1), visited);
2912     case Op_CreateEx:
2913       return false;
2914     default:
2915       break;
2916   }
2917 #ifdef ASSERT
2918   tty->print("need barrier on?: ");
2919   tty->print_cr("ins:");
2920   n->dump(2);
2921   tty->print_cr("outs:");
2922   n->dump(-2);
2923   ShouldNotReachHere();
2924 #endif
2925   return true;
2926 }
2927 
2928 bool ShenandoahLoadReferenceBarrierNode::is_redundant() {
2929   Unique_Node_List visited;
2930   Node_Stack stack(0);
2931   stack.push(this, 0);
2932 
2933   // Check if the barrier is actually useful: go over nodes looking for useful uses
2934   // (e.g. memory accesses). Stop once we detected a required use. Otherwise, walk
2935   // until we ran out of nodes, and then declare the barrier redundant.
2936   while (stack.size() > 0) {
2937     Node* n = stack.node();
2938     if (visited.member(n)) {
2939       stack.pop();
2940       continue;
2941     }
2942     visited.push(n);
2943     bool visit_users = false;
2944     switch (n->Opcode()) {
2945       case Op_CallStaticJava:
2946         // Uncommon traps don't need barriers, values are handled during deoptimization. It also affects
2947         // optimizing null-checks into implicit null-checks.
2948         if (n->as_CallStaticJava()->uncommon_trap_request() != 0) {
2949           break;
2950         }
2951       case Op_CallDynamicJava:
2952       case Op_CallLeaf:
2953       case Op_CallLeafNoFP:
2954       case Op_CompareAndSwapL:
2955       case Op_CompareAndSwapI:
2956       case Op_CompareAndSwapB:
2957       case Op_CompareAndSwapS:
2958       case Op_CompareAndSwapN:
2959       case Op_CompareAndSwapP:
2960       case Op_CompareAndExchangeL:
2961       case Op_CompareAndExchangeI:
2962       case Op_CompareAndExchangeB:
2963       case Op_CompareAndExchangeS:
2964       case Op_CompareAndExchangeN:
2965       case Op_CompareAndExchangeP:
2966       case Op_WeakCompareAndSwapL:
2967       case Op_WeakCompareAndSwapI:
2968       case Op_WeakCompareAndSwapB:
2969       case Op_WeakCompareAndSwapS:
2970       case Op_WeakCompareAndSwapN:
2971       case Op_WeakCompareAndSwapP:
2972       case Op_ShenandoahCompareAndSwapN:
2973       case Op_ShenandoahCompareAndSwapP:
2974       case Op_ShenandoahWeakCompareAndSwapN:
2975       case Op_ShenandoahWeakCompareAndSwapP:
2976       case Op_ShenandoahCompareAndExchangeN:
2977       case Op_ShenandoahCompareAndExchangeP:
2978       case Op_GetAndSetL:
2979       case Op_GetAndSetI:
2980       case Op_GetAndSetB:
2981       case Op_GetAndSetS:
2982       case Op_GetAndSetP:
2983       case Op_GetAndSetN:
2984       case Op_GetAndAddL:
2985       case Op_GetAndAddI:
2986       case Op_GetAndAddB:
2987       case Op_GetAndAddS:
2988       case Op_ShenandoahEnqueueBarrier:
2989       case Op_FastLock:
2990       case Op_FastUnlock:
2991       case Op_Rethrow:
2992       case Op_Return:
2993       case Op_StoreB:
2994       case Op_StoreC:
2995       case Op_StoreD:
2996       case Op_StoreF:
2997       case Op_StoreL:
2998       case Op_StoreLConditional:
2999       case Op_StoreI:
3000       case Op_StoreIConditional:
3001       case Op_StoreN:
3002       case Op_StoreP:
3003       case Op_StoreVector:
3004       case Op_StrInflatedCopy:
3005       case Op_StrCompressedCopy:
3006       case Op_EncodeP:
3007       case Op_CastP2X:
3008       case Op_SafePoint:
3009       case Op_EncodeISOArray:
3010       case Op_AryEq:
3011       case Op_StrEquals:
3012       case Op_StrComp:
3013       case Op_StrIndexOf:
3014       case Op_StrIndexOfChar:
3015       case Op_HasNegatives:
3016         // Known to require barriers
3017         return false;
3018       case Op_CmpP: {
3019         if (n->in(1)->bottom_type()->higher_equal(TypePtr::NULL_PTR) ||
3020             n->in(2)->bottom_type()->higher_equal(TypePtr::NULL_PTR)) {
3021           // One of the sides is known null, no need for barrier.
3022         } else {
3023           return false;
3024         }
3025         break;
3026       }
3027       case Op_LoadB:
3028       case Op_LoadUB:
3029       case Op_LoadUS:
3030       case Op_LoadD:
3031       case Op_LoadF:
3032       case Op_LoadL:
3033       case Op_LoadI:
3034       case Op_LoadS:
3035       case Op_LoadN:
3036       case Op_LoadP:
3037       case Op_LoadVector: {
3038         const TypePtr* adr_type = n->adr_type();
3039         int alias_idx = Compile::current()->get_alias_index(adr_type);
3040         Compile::AliasType* alias_type = Compile::current()->alias_type(alias_idx);
3041         ciField* field = alias_type->field();
3042         bool is_static = field != NULL && field->is_static();
3043         bool is_final = field != NULL && field->is_final();
3044 
3045         if (ShenandoahOptimizeStaticFinals && is_static && is_final) {
3046           // Loading the constant does not require barriers: it should be handled
3047           // as part of GC roots already.
3048         } else {
3049           return false;
3050         }
3051         break;
3052       }
3053       case Op_Conv2B:
3054       case Op_LoadRange:
3055       case Op_LoadKlass:
3056       case Op_LoadNKlass:
3057         // Do not require barriers
3058         break;
3059       case Op_AddP:
3060       case Op_CheckCastPP:
3061       case Op_CastPP:
3062       case Op_CMoveP:
3063       case Op_Phi:
3064       case Op_ShenandoahLoadReferenceBarrier:
3065         // Whether or not these need the barriers depends on their users
3066         visit_users = true;
3067         break;
3068       default: {
3069 #ifdef ASSERT
3070         fatal("Unknown node in is_redundant: %s", NodeClassNames[n->Opcode()]);
3071 #else
3072         // Default to have excess barriers, rather than miss some.
3073         return false;
3074 #endif
3075       }
3076     }
3077 
3078     stack.pop();
3079     if (visit_users) {
3080       for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
3081         Node* user = n->fast_out(i);
3082         if (user != NULL) {
3083           stack.push(user, 0);
3084         }
3085       }
3086     }
3087   }
3088 
3089   // No need for barrier found.
3090   return true;
3091 }