hotspot/src/share/vm/opto/compile.cpp

Print this page
rev 611 : Merge

@@ -1,10 +1,10 @@
 #ifdef USE_PRAGMA_IDENT_SRC
 #pragma ident "@(#)compile.cpp  1.633 07/09/28 10:23:11 JVM"
 #endif
 /*
- * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.

@@ -314,13 +314,10 @@
   Type::Initialize(compile);
   _compile->set_scratch_buffer_blob(NULL);
   _compile->begin_method();
 }
 CompileWrapper::~CompileWrapper() {
-  if (_compile->failing()) {
-    _compile->print_method("Failed");
-  }
   _compile->end_method();
   if (_compile->scratch_buffer_blob() != NULL)
     BufferBlob::free(_compile->scratch_buffer_blob());
   _compile->env()->set_compiler_data(NULL);
 }

@@ -334,10 +331,16 @@
     // Recompiling without allowing machine instructions to subsume loads
     tty->print_cr("*********************************************************");
     tty->print_cr("** Bailout: Recompile without subsuming loads          **");
     tty->print_cr("*********************************************************");
   }
+  if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) {
+    // Recompiling without escape analysis
+    tty->print_cr("*********************************************************");
+    tty->print_cr("** Bailout: Recompile without escape analysis          **");
+    tty->print_cr("*********************************************************");
+  }
   if (env()->break_at_compile()) {
     // Open the debugger when compiing this method.
     tty->print("### Breaking when compiling: ");
     method()->print_short_name();
     tty->cr();

@@ -363,11 +366,16 @@
   ResourceMark rm;
   int size = (MAX_inst_size + MAX_stubs_size + MAX_const_size);
   BufferBlob* blob = BufferBlob::create("Compile::scratch_buffer", size);
   // Record the buffer blob for next time.
   set_scratch_buffer_blob(blob);
-  guarantee(scratch_buffer_blob() != NULL, "Need BufferBlob for code generation");
+  // Have we run out of code space?
+  if (scratch_buffer_blob() == NULL) {
+    // Let CompilerBroker disable further compilations.
+    record_failure("Not enough space for scratch buffer in CodeCache");
+    return;
+  }
 
   // Initialize the relocation buffers
   relocInfo* locs_buf = (relocInfo*) blob->instructions_end() - MAX_locs_size;
   set_scratch_locs_memory(locs_buf);
 }

@@ -402,25 +410,20 @@
   buf.stubs()->initialize_shared_locs(&locs_buf[lsize], lsize);
   n->emit(buf, this->regalloc());
   return buf.code_size();
 }
 
-void  Compile::record_for_escape_analysis(Node* n) {
-  if (_congraph != NULL)
-    _congraph->record_for_escape_analysis(n);
-}
-
 
 // ============================================================================
 //------------------------------Compile standard-------------------------------
 debug_only( int Compile::_debug_idx = 100000; )
 
 // Compile a method.  entry_bci is -1 for normal compilations and indicates
 // the continuation bci for on stack replacement.
 
 
-Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads )
+Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads, bool do_escape_analysis )
                 : Phase(Compiler),
                   _env(ci_env),
                   _log(ci_env->log()),
                   _compile_id(ci_env->compile_id()),
                   _save_argument_registers(false),

@@ -431,10 +434,11 @@
                   _entry_bci(osr_bci),
                   _initial_gvn(NULL),
                   _for_igvn(NULL),
                   _warm_calls(NULL),
                   _subsume_loads(subsume_loads),
+                  _do_escape_analysis(do_escape_analysis),
                   _failure_reason(NULL),
                   _code_buffer("Compile::Fill_buffer"),
                   _orig_pc_slot(0),
                   _orig_pc_slot_offset_in_bytes(0),
                   _node_bundling_limit(0),

@@ -455,11 +459,20 @@
     target->print_short_name();
     tty->print("  ");
   }
   TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
   TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
-  set_print_assembly(PrintOptoAssembly || _method->should_print_assembly());
+  bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");
+  if (!print_opto_assembly) {
+    bool print_assembly = (PrintAssembly || _method->should_print_assembly());
+    if (print_assembly && !Disassembler::can_decode()) {
+      tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");
+      print_opto_assembly = true;
+    }
+  }
+  set_print_assembly(print_opto_assembly);
+  set_parsed_irreducible_loop(false);
 #endif
 
   if (ProfileTraps) {
     // Make sure the method being compiled gets its own MDO,
     // so we can at least track the decompile_count().

@@ -488,13 +501,10 @@
   uint estimated_size = method()->code_size()*4+64;
   estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
   PhaseGVN gvn(node_arena(), estimated_size);
   set_initial_gvn(&gvn);
 
-  if (DoEscapeAnalysis)
-    _congraph = new ConnectionGraph(this);
-
   { // Scope for timing the parser
     TracePhase t3("parse", &_t_parser, true);
 
     // Put top into the hash table ASAP.
     initial_gvn()->transform_no_reclaim(top());

@@ -542,10 +552,12 @@
       // to whatever caller is dynamically above us on the stack.
       // This is done by a special, unique RethrowNode bound to root.
       rethrow_exceptions(kit.transfer_exceptions_into_jvms());
     }
 
+    print_method("Before RemoveUseless", 3);
+
     // Remove clutter produced by parsing.
     if (!failing()) {
       ResourceMark rm;
       PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
     }

@@ -575,18 +587,36 @@
 
   if (failing())  return;
   NOT_PRODUCT( verify_graph_edges(); )
 
   // Perform escape analysis
-  if (_congraph != NULL) {
-    NOT_PRODUCT( TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, TimeCompiler); )
-    _congraph->compute_escape();
+  if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
+    TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, true);
+    // Add ConP#NULL and ConN#NULL nodes before ConnectionGraph construction.
+    PhaseGVN* igvn = initial_gvn();
+    Node* oop_null = igvn->zerocon(T_OBJECT);
+    Node* noop_null = igvn->zerocon(T_NARROWOOP);
+
+    _congraph = new(comp_arena()) ConnectionGraph(this);
+    bool has_non_escaping_obj = _congraph->compute_escape();
+
 #ifndef PRODUCT
     if (PrintEscapeAnalysis) {
       _congraph->dump();
     }
 #endif
+    // Cleanup.
+    if (oop_null->outcnt() == 0)
+      igvn->hash_delete(oop_null);
+    if (noop_null->outcnt() == 0)
+      igvn->hash_delete(noop_null);
+
+    if (!has_non_escaping_obj) {
+      _congraph = NULL;
+    }
+
+    if (failing())  return;
   }
   // Now optimize
   Optimize();
   if (failing())  return;
   NOT_PRODUCT( verify_graph_edges(); )

@@ -676,10 +706,11 @@
     _for_igvn(NULL),
     _warm_calls(NULL),
     _orig_pc_slot(0),
     _orig_pc_slot_offset_in_bytes(0),
     _subsume_loads(true),
+    _do_escape_analysis(false),
     _failure_reason(NULL),
     _code_buffer("Compile::Fill_buffer"),
     _node_bundling_limit(0),
     _node_bundling_base(NULL),
 #ifndef PRODUCT

@@ -691,10 +722,11 @@
 
 #ifndef PRODUCT
   TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
   TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
   set_print_assembly(PrintFrameConverterAssembly);
+  set_parsed_irreducible_loop(false);
 #endif
   CompileWrapper cw(this);
   Init(/*AliasLevel=*/ 0);
   init_tf((*generator)());
 

@@ -791,10 +823,11 @@
   _major_progress = true; // start out assuming good things will happen
   set_has_unsafe_access(false);
   Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
   set_decompile_count(0);
 
+  set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));
   // Compilation level related initialization
   if (env()->comp_level() == CompLevel_fast_compile) {
     set_num_loop_opts(Tier1LoopOptsCount);
     set_do_inlining(Tier1Inline != 0);
     set_max_inline_size(Tier1MaxInlineSize);

@@ -823,11 +856,11 @@
   // // Update cached type information
   // if( _method && _method->constants() ) 
   //   Type::update_loaded_types(_method, _method->constants());
 
   // Init alias_type map.
-  if (!DoEscapeAnalysis && aliaslevel == 3)
+  if (!_do_escape_analysis && aliaslevel == 3)
     aliaslevel = 2;  // No unique types without escape analysis
   _AliasLevel = aliaslevel;
   const int grow_ats = 16;
   _max_alias_types = grow_ats;
   _alias_types   = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);

@@ -980,28 +1013,39 @@
 //------------------------------flatten_alias_type-----------------------------
 const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
   int offset = tj->offset();
   TypePtr::PTR ptr = tj->ptr();
 
+  // Known instance (scalarizable allocation) alias only with itself.
+  bool is_known_inst = tj->isa_oopptr() != NULL &&
+                       tj->is_oopptr()->is_known_instance();
+
   // Process weird unsafe references.
   if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {
     assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");
+    assert(!is_known_inst, "scalarizable allocation should not have unsafe references");
     tj = TypeOopPtr::BOTTOM;
     ptr = tj->ptr();
     offset = tj->offset();
   }
 
   // Array pointers need some flattening
   const TypeAryPtr *ta = tj->isa_aryptr();
-  if( ta && _AliasLevel >= 2 ) {
+  if( ta && is_known_inst ) {
+    if ( offset != Type::OffsetBot &&
+         offset > arrayOopDesc::length_offset_in_bytes() ) {
+      offset = Type::OffsetBot; // Flatten constant access into array body only
+      tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id());
+    }
+  } else if( ta && _AliasLevel >= 2 ) {
     // For arrays indexed by constant indices, we flatten the alias
     // space to include all of the array body.  Only the header, klass
     // and array length can be accessed un-aliased.
     if( offset != Type::OffsetBot ) {
       if( ta->const_oop() ) { // methodDataOop or methodOop
         offset = Type::OffsetBot;   // Flatten constant access into array body
-        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,Type::OffsetBot, ta->instance_id());
+        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
       } else if( offset == arrayOopDesc::length_offset_in_bytes() ) {
         // range is OK as-is.
         tj = ta = TypeAryPtr::RANGE; 
       } else if( offset == oopDesc::klass_offset_in_bytes() ) {
         tj = TypeInstPtr::KLASS; // all klass loads look alike

@@ -1011,29 +1055,33 @@
         tj = TypeInstPtr::MARK;
         ta = TypeAryPtr::RANGE; // generic ignored junk
         ptr = TypePtr::BotPTR;
       } else {                  // Random constant offset into array body 
         offset = Type::OffsetBot;   // Flatten constant access into array body
-        tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,Type::OffsetBot, ta->instance_id());
+        tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset);
       }
     }
     // Arrays of fixed size alias with arrays of unknown size.
     if (ta->size() != TypeInt::POS) {
       const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);
-      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset, ta->instance_id());
+      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset);
     }
     // Arrays of known objects become arrays of unknown objects.
+    if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) {
+      const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size());
+      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
+    }
     if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {
       const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());
-      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset, ta->instance_id());
+      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
     }
     // Arrays of bytes and of booleans both use 'bastore' and 'baload' so
     // cannot be distinguished by bytecode alone.
     if (ta->elem() == TypeInt::BOOL) {
       const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());
       ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);
-      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset, ta->instance_id());
+      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset);
     }
     // During the 2nd round of IterGVN, NotNull castings are removed.
     // Make sure the Bottom and NotNull variants alias the same.
     // Also, make sure exact and non-exact variants alias the same.
     if( ptr == TypePtr::NotNull || ta->klass_is_exact() ) {

@@ -1049,31 +1097,40 @@
   const TypeInstPtr *to = tj->isa_instptr();
   if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {
     if( ptr == TypePtr::Constant ) {
       // No constant oop pointers (such as Strings); they alias with
       // unknown strings.
+      assert(!is_known_inst, "not scalarizable allocation");
       tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
+    } else if( is_known_inst ) {
+      tj = to; // Keep NotNull and klass_is_exact for instance type
     } else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {
       // During the 2nd round of IterGVN, NotNull castings are removed.
       // Make sure the Bottom and NotNull variants alias the same.
       // Also, make sure exact and non-exact variants alias the same.
-      tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset, to->instance_id());
+      tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
     }
     // Canonicalize the holder of this field
     ciInstanceKlass *k = to->klass()->as_instance_klass();
-    if (offset >= 0 && offset < oopDesc::header_size() * wordSize) {
+    if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) {
       // First handle header references such as a LoadKlassNode, even if the
       // object's klass is unloaded at compile time (4965979).
-      tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset, to->instance_id());
+      if (!is_known_inst) { // Do it only for non-instance types
+        tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset);
+      }
     } else if (offset < 0 || offset >= k->size_helper() * wordSize) {
       to = NULL;
       tj = TypeOopPtr::BOTTOM;
       offset = tj->offset();
     } else {
       ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);
       if (!k->equals(canonical_holder) || tj->offset() != offset) {
-        tj = to = TypeInstPtr::make(TypePtr::BotPTR, canonical_holder, false, NULL, offset, to->instance_id());
+        if( is_known_inst ) {
+          tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id());
+        } else {
+          tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset);
+        }
       }
     }
   }
 
   // Klass pointers to object array klasses need some flattening

@@ -1169,12 +1226,12 @@
   _index = i;
   _adr_type = at;
   _field = NULL;
   _is_rewritable = true; // default
   const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;
-  if (atoop != NULL && atoop->is_instance()) {
-    const TypeOopPtr *gt = atoop->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+  if (atoop != NULL && atoop->is_known_instance()) {
+    const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot);
     _general_index = Compile::current()->get_alias_index(gt);
   } else {
     _general_index = 0;
   }
 }

@@ -1255,11 +1312,13 @@
 #ifdef ASSERT
   assert(flat == flatten_alias_type(flat), "idempotent");
   assert(flat != TypePtr::BOTTOM,     "cannot alias-analyze an untyped ptr");
   if (flat->isa_oopptr() && !flat->isa_klassptr()) {
     const TypeOopPtr* foop = flat->is_oopptr();
-    const TypePtr* xoop = foop->cast_to_exactness(!foop->klass_is_exact())->is_ptr();
+    // Scalarizable allocations have exact klass always.
+    bool exact = !foop->klass_is_exact() || foop->is_known_instance();
+    const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr();
     assert(foop == flatten_alias_type(xoop), "exactness must not affect alias type");
   }
   assert(flat == flatten_alias_type(flat), "exact bit doesn't matter");
 #endif
 

@@ -1299,11 +1358,11 @@
     // but the base pointer type is not distinctive enough to identify
     // references into JavaThread.)
 
     // Check for final instance fields.
     const TypeInstPtr* tinst = flat->isa_instptr();
-    if (tinst && tinst->offset() >= oopDesc::header_size() * wordSize) {
+    if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) {
       ciInstanceKlass *k = tinst->klass()->as_instance_klass();
       ciField* field = k->get_field_by_offset(tinst->offset(), false);
       // Set field() and is_rewritable() attributes.
       if (field != NULL)  alias_type(idx)->set_field(field);
     }

@@ -1459,11 +1518,11 @@
   ResourceMark rm;
   int          loop_opts_cnt;
 
   NOT_PRODUCT( verify_graph_edges(); )
 
-  print_method("Start");
+  print_method("After Parsing");
 
  {
   // Iterative Global Value Numbering, including ideal transforms
   // Initialize IterGVN with types and values from parse-time GVN
   PhaseIterGVN igvn(initial_gvn());

@@ -1474,15 +1533,10 @@
 
   print_method("Iter GVN 1", 2);
   
   if (failing())  return;
 
-  // get rid of the connection graph since it's information is not
-  // updated by optimizations
-  _congraph = NULL;
-
-
   // Loop transforms on the ideal graph.  Range Check Elimination,
   // peeling, unrolling, etc.
 
   // Set loop opts counter
   loop_opts_cnt = num_loop_opts();

@@ -1644,12 +1698,18 @@
   // Prior to register allocation we kept empty basic blocks in case the
   // the allocator needed a place to spill.  After register allocation we 
   // are not adding any new instructions.  If any basic block is empty, we 
   // can now safely remove it.
   {
-    NOT_PRODUCT( TracePhase t2("removeEmpty", &_t_removeEmptyBlocks, TimeCompiler); )
-    cfg.RemoveEmpty();
+    NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
+    cfg.remove_empty();
+    if (do_freq_based_layout()) {
+      PhaseBlockLayout layout(cfg);
+    } else {
+      cfg.set_loop_alignment();
+    }
+    cfg.fixup_flow();
   }
 
   // Perform any platform dependent postallocation verifications.
   debug_only( _regalloc->pd_postallocate_verify_hook(); )
 

@@ -1666,11 +1726,11 @@
     TracePhase t2a("output", &_t_output, true);
     NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
     Output();
   }
 
-  print_method("End");
+  print_method("Final Code");
 
   // He's dead, Jim.
   _cfg     = (PhaseCFG*)0xdeadbeef;
   _regalloc = (PhaseChaitin*)0xdeadbeef;
 }

@@ -1720,10 +1780,12 @@
         }
         if (bundle->starts_bundle())
           starts_bundle = '+';
       }
 
+      if (WizardMode) n->dump();
+
       if( !n->is_Region() &&    // Dont print in the Assembly
           !n->is_Phi() &&       // a few noisely useless nodes
           !n->is_Proj() &&
           !n->is_MachTemp() &&
           !n->is_Catch() &&     // Would be nice to print exception table targets

@@ -1744,10 +1806,12 @@
 
       // If we have an instruction with a delay slot, and have seen a delay,
       // then back up and print it
       if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
         assert(delay != NULL, "no unconditional delay instruction");
+        if (WizardMode) delay->dump();
+
         if (node_bundling(delay)->starts_bundle())
           starts_bundle = '+';
         if (pcs && n->_idx < pc_limit)
           tty->print("%3.3x", pcs[n->_idx]);
         else

@@ -1808,18 +1872,19 @@
 };
 
 static bool oop_offset_is_sane(const TypeInstPtr* tp) {
   ciInstanceKlass *k = tp->klass()->as_instance_klass();
   // Make sure the offset goes inside the instance layout.
-  return (uint)tp->offset() < (uint)(oopDesc::header_size() + k->nonstatic_field_size())*wordSize;
+  return k->contains_field_offset(tp->offset());
   // Note that OffsetBot and OffsetTop are very negative.
 }
 
 //------------------------------final_graph_reshaping_impl----------------------
 // Implement items 1-5 from final_graph_reshaping below.
 static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
 
+  if ( n->outcnt() == 0 ) return; // dead node
   uint nop = n->Opcode();
 
   // Check for 2-input instruction with "last use" on right input.
   // Swap to left input.  Implements item (2).
   if( n->req() == 3 &&          // two-input instruction

@@ -1882,11 +1947,11 @@
   case Op_CmpD3:
     fpu.inc_double_count();
     break;
   case Op_Opaque1:              // Remove Opaque Nodes before matching
   case Op_Opaque2:              // Remove Opaque Nodes before matching
-    n->replace_by(n->in(1));
+    n->subsume_by(n->in(1));
     break;
   case Op_CallStaticJava:
   case Op_CallJava:
   case Op_CallDynamicJava:
     fpu.inc_java_call_count(); // Count java call site;

@@ -1907,10 +1972,11 @@
       // Clone shared simple arguments to uncommon calls, item (1).
       if( n->outcnt() > 1 && 
           !n->is_Proj() && 
           nop != Op_CreateEx && 
           nop != Op_CheckCastPP && 
+          nop != Op_DecodeN &&
           !n->is_Mem() ) {
         Node *x = n->clone();
         call->set_req( TypeFunc::Parms, x );
       }
     }

@@ -1931,24 +1997,29 @@
   case Op_StoreC:
   case Op_StoreCM:
   case Op_StorePConditional:
   case Op_StoreI:
   case Op_StoreL:
+  case Op_StoreIConditional:
   case Op_StoreLConditional:
   case Op_CompareAndSwapI:
   case Op_CompareAndSwapL:
   case Op_CompareAndSwapP:
+  case Op_CompareAndSwapN:
   case Op_StoreP:
+  case Op_StoreN:
   case Op_LoadB:
   case Op_LoadC:
   case Op_LoadI:
   case Op_LoadKlass:
+  case Op_LoadNKlass:
   case Op_LoadL:
   case Op_LoadL_unaligned:
   case Op_LoadPLocked:
   case Op_LoadLLocked:
   case Op_LoadP:
+  case Op_LoadN:
   case Op_LoadRange:
   case Op_LoadS: {
   handle_mem:
 #ifdef ASSERT
     if( VerifyOptoOopOffsets ) {

@@ -1959,40 +2030,234 @@
       assert( !tp || oop_offset_is_sane(tp), "" );
     }
 #endif
     break;
   }
-  case Op_If:
-  case Op_CountedLoopEnd:
-    fpu._tests.push(n);         // Collect CFG split points
-    break;
 
   case Op_AddP: {               // Assert sane base pointers
-    const Node *addp = n->in(AddPNode::Address);
+    Node *addp = n->in(AddPNode::Address);
     assert( !addp->is_AddP() || 
             addp->in(AddPNode::Base)->is_top() || // Top OK for allocation
             addp->in(AddPNode::Base) == n->in(AddPNode::Base), 
             "Base pointers must match" );
+#ifdef _LP64
+    if (UseCompressedOops &&
+        addp->Opcode() == Op_ConP &&
+        addp == n->in(AddPNode::Base) &&
+        n->in(AddPNode::Offset)->is_Con()) {
+      // Use addressing with narrow klass to load with offset on x86.
+      // On sparc loading 32-bits constant and decoding it have less
+      // instructions (4) then load 64-bits constant (7).
+      // Do this transformation here since IGVN will convert ConN back to ConP.
+      const Type* t = addp->bottom_type();
+      if (t->isa_oopptr()) {
+        Node* nn = NULL;
+
+        // Look for existing ConN node of the same exact type.
+        Compile* C = Compile::current();
+        Node* r  = C->root();
+        uint cnt = r->outcnt();
+        for (uint i = 0; i < cnt; i++) {
+          Node* m = r->raw_out(i);
+          if (m!= NULL && m->Opcode() == Op_ConN &&
+              m->bottom_type()->make_ptr() == t) {
+            nn = m;
+            break;
+          }
+        }
+        if (nn != NULL) {
+          // Decode a narrow oop to match address
+          // [R12 + narrow_oop_reg<<3 + offset]
+          nn = new (C,  2) DecodeNNode(nn, t);
+          n->set_req(AddPNode::Base, nn);
+          n->set_req(AddPNode::Address, nn);
+          if (addp->outcnt() == 0) {
+            addp->disconnect_inputs(NULL);
+          }
+        }
+      }
+    }
+#endif
     break;
   }
 
+#ifdef _LP64
+  case Op_CastPP:
+    if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) {
+      Compile* C = Compile::current();
+      Node* in1 = n->in(1);
+      const Type* t = n->bottom_type();
+      Node* new_in1 = in1->clone();
+      new_in1->as_DecodeN()->set_type(t);
+
+      if (!Matcher::clone_shift_expressions) {
+        //
+        // x86, ARM and friends can handle 2 adds in addressing mode
+        // and Matcher can fold a DecodeN node into address by using
+        // a narrow oop directly and do implicit NULL check in address:
+        //
+        // [R12 + narrow_oop_reg<<3 + offset]
+        // NullCheck narrow_oop_reg
+        //
+        // On other platforms (Sparc) we have to keep new DecodeN node and
+        // use it to do implicit NULL check in address:
+        //
+        // decode_not_null narrow_oop_reg, base_reg
+        // [base_reg + offset]
+        // NullCheck base_reg
+        //
+        // Pin the new DecodeN node to non-null path on these patforms (Sparc)
+        // to keep the information to which NULL check the new DecodeN node
+        // corresponds to use it as value in implicit_null_check().
+        //
+        new_in1->set_req(0, n->in(0));
+      }
+
+      n->subsume_by(new_in1);
+      if (in1->outcnt() == 0) {
+        in1->disconnect_inputs(NULL);
+      }
+    }
+    break;
+
+  case Op_CmpP:
+    // Do this transformation here to preserve CmpPNode::sub() and
+    // other TypePtr related Ideal optimizations (for example, ptr nullness).
+    if (n->in(1)->is_DecodeN() || n->in(2)->is_DecodeN()) {
+      Node* in1 = n->in(1);
+      Node* in2 = n->in(2);
+      if (!in1->is_DecodeN()) {
+        in2 = in1;
+        in1 = n->in(2);
+      }
+      assert(in1->is_DecodeN(), "sanity");
+
+      Compile* C = Compile::current();
+      Node* new_in2 = NULL;
+      if (in2->is_DecodeN()) {
+        new_in2 = in2->in(1);
+      } else if (in2->Opcode() == Op_ConP) {
+        const Type* t = in2->bottom_type();
+        if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) {
+          new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
+          //
+          // This transformation together with CastPP transformation above
+          // will generated code for implicit NULL checks for compressed oops.
+          //
+          // The original code after Optimize()
+          //
+          //    LoadN memory, narrow_oop_reg
+          //    decode narrow_oop_reg, base_reg
+          //    CmpP base_reg, NULL
+          //    CastPP base_reg // NotNull
+          //    Load [base_reg + offset], val_reg
+          //
+          // after these transformations will be
+          //
+          //    LoadN memory, narrow_oop_reg
+          //    CmpN narrow_oop_reg, NULL
+          //    decode_not_null narrow_oop_reg, base_reg
+          //    Load [base_reg + offset], val_reg
+          //
+          // and the uncommon path (== NULL) will use narrow_oop_reg directly
+          // since narrow oops can be used in debug info now (see the code in
+          // final_graph_reshaping_walk()).
+          //
+          // At the end the code will be matched to
+          // on x86:
+          //
+          //    Load_narrow_oop memory, narrow_oop_reg
+          //    Load [R12 + narrow_oop_reg<<3 + offset], val_reg
+          //    NullCheck narrow_oop_reg
+          //
+          // and on sparc:
+          //
+          //    Load_narrow_oop memory, narrow_oop_reg
+          //    decode_not_null narrow_oop_reg, base_reg
+          //    Load [base_reg + offset], val_reg
+          //    NullCheck base_reg
+          //
+        } else if (t->isa_oopptr()) {
+          new_in2 = ConNode::make(C, t->make_narrowoop());
+        }
+      }
+      if (new_in2 != NULL) {
+        Node* cmpN = new (C, 3) CmpNNode(in1->in(1), new_in2);
+        n->subsume_by( cmpN );
+        if (in1->outcnt() == 0) {
+          in1->disconnect_inputs(NULL);
+        }
+        if (in2->outcnt() == 0) {
+          in2->disconnect_inputs(NULL);
+        }
+      }
+    }
+    break;
+
+  case Op_DecodeN:
+    assert(!n->in(1)->is_EncodeP(), "should be optimized out");
+    // DecodeN could be pinned on Sparc where it can't be fold into
+    // an address expression, see the code for Op_CastPP above.
+    assert(n->in(0) == NULL || !Matcher::clone_shift_expressions, "no control except on sparc");
+    break;
+
+  case Op_EncodeP: {
+    Node* in1 = n->in(1);
+    if (in1->is_DecodeN()) {
+      n->subsume_by(in1->in(1));
+    } else if (in1->Opcode() == Op_ConP) {
+      Compile* C = Compile::current();
+      const Type* t = in1->bottom_type();
+      if (t == TypePtr::NULL_PTR) {
+        n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR));
+      } else if (t->isa_oopptr()) {
+        n->subsume_by(ConNode::make(C, t->make_narrowoop()));
+      }
+    }
+    if (in1->outcnt() == 0) {
+      in1->disconnect_inputs(NULL);
+    }
+    break;
+  }
+
+  case Op_Phi:
+    if (n->as_Phi()->bottom_type()->isa_narrowoop()) {
+      // The EncodeP optimization may create Phi with the same edges
+      // for all paths. It is not handled well by Register Allocator.
+      Node* unique_in = n->in(1);
+      assert(unique_in != NULL, "");
+      uint cnt = n->req();
+      for (uint i = 2; i < cnt; i++) {
+        Node* m = n->in(i);
+        assert(m != NULL, "");
+        if (unique_in != m)
+          unique_in = NULL;
+      }
+      if (unique_in != NULL) {
+        n->subsume_by(unique_in);
+      }
+    }
+    break;
+
+#endif
+
   case Op_ModI:
     if (UseDivMod) {
       // Check if a%b and a/b both exist
       Node* d = n->find_similar(Op_DivI);
       if (d) {
         // Replace them with a fused divmod if supported
         Compile* C = Compile::current();
         if (Matcher::has_match_rule(Op_DivModI)) {
           DivModINode* divmod = DivModINode::make(C, n);
-          d->replace_by(divmod->div_proj());
-          n->replace_by(divmod->mod_proj());
+          d->subsume_by(divmod->div_proj());
+          n->subsume_by(divmod->mod_proj());
         } else {
           // replace a%b with a-((a/b)*b)
           Node* mult = new (C, 3) MulINode(d, d->in(2));
           Node* sub  = new (C, 3) SubINode(d->in(1), mult);
-          n->replace_by( sub );
+          n->subsume_by( sub );
         }
       }
     }
     break;
 

@@ -2003,17 +2268,17 @@
       if (d) {
         // Replace them with a fused divmod if supported
         Compile* C = Compile::current();
         if (Matcher::has_match_rule(Op_DivModL)) {
           DivModLNode* divmod = DivModLNode::make(C, n);
-          d->replace_by(divmod->div_proj());
-          n->replace_by(divmod->mod_proj());
+          d->subsume_by(divmod->div_proj());
+          n->subsume_by(divmod->mod_proj());
         } else {
           // replace a%b with a-((a/b)*b)
           Node* mult = new (C, 3) MulLNode(d, d->in(2));
           Node* sub  = new (C, 3) SubLNode(d->in(1), mult);
-          n->replace_by( sub );
+          n->subsume_by( sub );
         }
       }
     }
     break;
 

@@ -2055,36 +2320,43 @@
   case Op_PackD:
     if (n->req()-1 > 2) {
       // Replace many operand PackNodes with a binary tree for matching
       PackNode* p = (PackNode*) n;
       Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
-      n->replace_by(btp);
+      n->subsume_by(btp);
     }
     break;
   default: 
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
-    if( n->is_If() || n->is_PCTable() ) 
-      fpu._tests.push(n);       // Collect CFG split points
     break;
   }
+
+  // Collect CFG split points
+  if (n->is_MultiBranch())
+    fpu._tests.push(n);
 }
 
 //------------------------------final_graph_reshaping_walk---------------------
 // Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
 // requires that the walk visits a node's inputs before visiting the node.
 static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) {
+  ResourceArea *area = Thread::current()->resource_area();
+  Unique_Node_List sfpt(area);
+
   fpu._visited.set(root->_idx); // first, mark node as visited
   uint cnt = root->req();
   Node *n = root;
   uint  i = 0;
   while (true) {
     if (i < cnt) {
       // Place all non-visited non-null inputs onto stack
       Node* m = n->in(i);
       ++i;
       if (m != NULL && !fpu._visited.test_set(m->_idx)) {
+        if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL)
+          sfpt.push(m);
         cnt = m->req();
         nstack.push(n, i); // put on stack parent and next input's index 
         n = m;
         i = 0;
       }

@@ -2097,10 +2369,45 @@
       cnt = n->req();
       i = nstack.index();
       nstack.pop();        // Shift to the next node on stack
     }
   }
+
+  // Go over safepoints nodes to skip DecodeN nodes for debug edges.
+  // It could be done for an uncommon traps or any safepoints/calls
+  // if the DecodeN node is referenced only in a debug info.
+  while (sfpt.size() > 0) {
+    n = sfpt.pop();
+    JVMState *jvms = n->as_SafePoint()->jvms();
+    assert(jvms != NULL, "sanity");
+    int start = jvms->debug_start();
+    int end   = n->req();
+    bool is_uncommon = (n->is_CallStaticJava() &&
+                        n->as_CallStaticJava()->uncommon_trap_request() != 0);
+    for (int j = start; j < end; j++) {
+      Node* in = n->in(j);
+      if (in->is_DecodeN()) {
+        bool safe_to_skip = true;
+        if (!is_uncommon ) {
+          // Is it safe to skip?
+          for (uint i = 0; i < in->outcnt(); i++) {
+            Node* u = in->raw_out(i);
+            if (!u->is_SafePoint() ||
+                 u->is_Call() && u->as_Call()->has_non_debug_use(n)) {
+              safe_to_skip = false;
+            }
+          }
+        }
+        if (safe_to_skip) {
+          n->set_req(j, in->in(1));
+        }
+        if (in->outcnt() == 0) {
+          in->disconnect_inputs(NULL);
+        }
+      }
+    }
+  }
 }
 
 //------------------------------final_graph_reshaping--------------------------
 // Final Graph Reshaping.  
 //

@@ -2143,50 +2450,49 @@
   Node_Stack nstack(unique() >> 1);
   final_graph_reshaping_walk(nstack, root(), fpu);
 
   // Check for unreachable (from below) code (i.e., infinite loops).
   for( uint i = 0; i < fpu._tests.size(); i++ ) {
-    Node *n = fpu._tests[i];
-    assert( n->is_PCTable() || n->is_If(), "either PCTables or IfNodes" );
-    // Get number of CFG targets; 2 for IfNodes or _size for PCTables.
+    MultiBranchNode *n = fpu._tests[i]->as_MultiBranch();
+    // Get number of CFG targets.
     // Note that PCTables include exception targets after calls.
-    uint expected_kids = n->is_PCTable() ? n->as_PCTable()->_size : 2;
-    if (n->outcnt() != expected_kids) {
+    uint required_outcnt = n->required_outcnt();
+    if (n->outcnt() != required_outcnt) {
       // Check for a few special cases.  Rethrow Nodes never take the
       // 'fall-thru' path, so expected kids is 1 less.
       if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) {
         if (n->in(0)->in(0)->is_Call()) {
           CallNode *call = n->in(0)->in(0)->as_Call();
           if (call->entry_point() == OptoRuntime::rethrow_stub()) {
-            expected_kids--;      // Rethrow always has 1 less kid
+            required_outcnt--;      // Rethrow always has 1 less kid
           } else if (call->req() > TypeFunc::Parms &&
                      call->is_CallDynamicJava()) {
             // Check for null receiver. In such case, the optimizer has 
             // detected that the virtual call will always result in a null 
             // pointer exception. The fall-through projection of this CatchNode 
             // will not be populated.
             Node *arg0 = call->in(TypeFunc::Parms);
             if (arg0->is_Type() &&
                 arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) { 
-              expected_kids--;
+              required_outcnt--;
             }
           } else if (call->entry_point() == OptoRuntime::new_array_Java() &&
                      call->req() > TypeFunc::Parms+1 &&
                      call->is_CallStaticJava()) {
             // Check for negative array length. In such case, the optimizer has 
             // detected that the allocation attempt will always result in an
             // exception. There is no fall-through projection of this CatchNode .
             Node *arg1 = call->in(TypeFunc::Parms+1);
             if (arg1->is_Type() &&
                 arg1->as_Type()->type()->join(TypeInt::POS)->empty()) {
-              expected_kids--;
+              required_outcnt--;
             }
           }
         }
       }
-      // Recheck with a better notion of 'expected_kids'
-      if (n->outcnt() != expected_kids) {
+      // Recheck with a better notion of 'required_outcnt'
+      if (n->outcnt() != required_outcnt) {
         record_method_not_compilable("malformed control flow");
         return true;            // Not all targets reachable!
       }
     }
     // Check that I actually visited all kids.  Unreached kids

@@ -2358,10 +2664,13 @@
   }
   if (_failure_reason == NULL) {
     // Record the first failure reason.
     _failure_reason = reason;
   }
+  if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
+    C->print_method(_failure_reason);
+  }
   _root = NULL;  // flush the graph, too
 }
 
 Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)
   : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false)