--- old/src/share/vm/opto/castnode.cpp	2017-03-24 07:35:15.575924994 +0100
+++ new/src/share/vm/opto/castnode.cpp	2017-03-24 07:35:15.455924999 +0100
@@ -225,7 +225,10 @@
   }
 
   // Similar to ConvI2LNode::Ideal() for the same reasons
-  if (can_reshape && !phase->C->major_progress()) {
+  // Do not narrow the type of range check dependent CastIINodes to
+  // avoid corruption of the graph if a CastII is replaced by TOP but
+  // the corresponding range check is not removed.
+  if (can_reshape && !_range_check_dependency && !phase->C->major_progress()) {
     const TypeInt* this_type = this->type()->is_int();
     const TypeInt* in_type = phase->type(in(1))->isa_int();
     if (in_type != NULL && this_type != NULL &&
--- old/src/share/vm/opto/convertnode.cpp	2017-03-24 07:35:15.923924978 +0100
+++ new/src/share/vm/opto/convertnode.cpp	2017-03-24 07:35:15.807924983 +0100
@@ -294,8 +294,7 @@
   }
 
 #ifdef _LP64
-  // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y)) or
-  // ConvI2L(CastII(AddI(x, y))) to AddL(ConvI2L(CastII(x)), ConvI2L(CastII(y))),
+  // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y))
   // but only if x and y have subranges that cannot cause 32-bit overflow,
   // under the assumption that x+y is in my own subrange this->type().
 
@@ -319,13 +318,6 @@
 
   Node* z = in(1);
   int op = z->Opcode();
-  Node* ctrl = NULL;
-  if (op == Op_CastII && z->as_CastII()->has_range_check()) {
-    // Skip CastII node but save control dependency
-    ctrl = z->in(0);
-    z = z->in(1);
-    op = z->Opcode();
-  }
   if (op == Op_AddI || op == Op_SubI) {
     Node* x = z->in(1);
     Node* y = z->in(2);
@@ -385,8 +377,8 @@
     }
     assert(rxlo == (int)rxlo && rxhi == (int)rxhi, "x should not overflow");
     assert(rylo == (int)rylo && ryhi == (int)ryhi, "y should not overflow");
-    Node* cx = phase->C->constrained_convI2L(phase, x, TypeInt::make(rxlo, rxhi, widen), ctrl);
-    Node* cy = phase->C->constrained_convI2L(phase, y, TypeInt::make(rylo, ryhi, widen), ctrl);
+    Node* cx = phase->C->constrained_convI2L(phase, x, TypeInt::make(rxlo, rxhi, widen), NULL);
+    Node* cy = phase->C->constrained_convI2L(phase, y, TypeInt::make(rylo, ryhi, widen), NULL);
     switch (op) {
       case Op_AddI:  return new AddLNode(cx, cy);
       case Op_SubI:  return new SubLNode(cx, cy);
--- old/test/compiler/loopopts/TestLoopPeeling.java	2017-03-24 07:35:16.279924961 +0100
+++ new/test/compiler/loopopts/TestLoopPeeling.java	2017-03-24 07:35:16.163924966 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,7 +23,7 @@
 
 /*
  * @test
- * @bug 8078262
+ * @bug 8078262 8177095
  * @summary Tests correct dominator information after loop peeling.
  *
  * @run main/othervm -Xcomp
@@ -40,14 +40,16 @@
     public static void main(String args[]) {
         TestLoopPeeling test = new TestLoopPeeling();
         try {
-            test.testArrayAccess(0, 1);
+            test.testArrayAccess1(0, 1);
+            test.testArrayAccess2(0);
+            test.testArrayAccess3(0, false);
             test.testArrayAllocation(0, 1);
         } catch (Exception e) {
             // Ignore exceptions
         }
     }
 
-    public void testArrayAccess(int index, int inc) {
+    public void testArrayAccess1(int index, int inc) {
         int storeIndex = -1;
 
         for (; index < 10; index += inc) {
@@ -63,7 +65,7 @@
 
             if (index == 42) {
                 // This store and the corresponding range check are moved out of the
-                // loop and both used after old loop and the peeled iteration exit.
+                // loop and both used after main loop and the peeled iteration exit.
                 // For the peeled iteration, storeIndex is always -1 and the ConvI2L
                 // is replaced by TOP. However, the range check is not folded because
                 // we don't do the split if optimization in PhaseIdealLoop2.
@@ -77,6 +79,44 @@
         }
     }
 
+    public int testArrayAccess2(int index) {
+        // Load1 and the corresponding range check are moved out of the loop
+        // and both are used after the main loop and the peeled iteration exit.
+        // For the peeled iteration, storeIndex is always Integer.MIN_VALUE and
+        // for the main loop it is 0. Hence, the merging phi has type int:<=0.
+        // Load1 reads the array at index ConvI2L(CastII(AddI(storeIndex, -1)))
+        // where the CastII is range check dependent and has type int:>=0.
+        // The CastII gets pushed through the AddI and its type is changed to int:>=1
+        // which does not overlap with the input type of storeIndex (int:<=0).
+        // The CastII is replaced by TOP causing a cascade of other eliminations.
+        // Since the control path through the range check CmpU(AddI(storeIndex, -1))
+        // is not eliminated, the graph is in a corrupted state. We fail once we merge
+        // with the result of Load2 because we get data from a non-dominating region.
+        int storeIndex = Integer.MIN_VALUE;
+        for (; index < 10; ++index) {
+            if (index == 42) {
+                return array[storeIndex-1]; // Load1
+            }
+            storeIndex = 0;
+        }
+        return array[42]; // Load2
+    }
+
+    public int testArrayAccess3(int index, boolean b) {
+        // Same as testArrayAccess2 but manifests as crash in register allocator.
+        int storeIndex = Integer.MIN_VALUE;
+        for (; index < 10; ++index) {
+            if (b) {
+                return 0;
+            }
+            if (index == 42) {
+                return array[storeIndex-1]; // Load1
+            }
+            storeIndex = 0;
+        }
+        return array[42]; // Load2
+    }
+
     public byte[] testArrayAllocation(int index, int inc) {
         int allocationCount = -1;
         byte[] result;
@@ -88,7 +128,7 @@
 
             if (index == 42) {
                 // This allocation and the corresponding size check are moved out of the
-                // loop and both used after old loop and the peeled iteration exit.
+                // loop and both used after main loop and the peeled iteration exit.
                 // For the peeled iteration, allocationCount is always -1 and the ConvI2L
                 // is replaced by TOP. However, the size check is not folded because
                 // we don't do the split if optimization in PhaseIdealLoop2.