--- old/src/cpu/sparc/vm/sparc.ad Thu May 20 16:56:53 2010 +++ new/src/cpu/sparc/vm/sparc.ad Thu May 20 16:56:53 2010 @@ -1750,6 +1750,12 @@ // registers? True for Intel but false for most RISCs const bool Matcher::clone_shift_expressions = false; +bool Matcher::narrow_oop_use_complex_address() { + NOT_LP64(ShouldNotCallThis()); + assert(UseCompressedOops, "only for comressed oops code"); + return false; +} + // Is it better to copy float constants, or load them directly from memory? // Intel can load a float constant from a direct address, requiring no // extra registers. Most RISCs will have to materialize an address into a --- old/src/cpu/sparc/vm/vm_version_sparc.cpp Thu May 20 16:56:54 2010 +++ new/src/cpu/sparc/vm/vm_version_sparc.cpp Thu May 20 16:56:54 2010 @@ -65,13 +65,6 @@ FLAG_SET_DEFAULT(UseInlineCaches, false); } #ifdef _LP64 - // Single issue niagara1 is slower for CompressedOops - // but niagaras after that it's fine. - if (!is_niagara1_plus()) { - if (FLAG_IS_DEFAULT(UseCompressedOops)) { - FLAG_SET_ERGO(bool, UseCompressedOops, false); - } - } // 32-bit oops don't make sense for the 64-bit VM on sparc // since the 32-bit VM has the same registers and smaller objects. Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes); --- old/src/cpu/x86/vm/x86_32.ad Thu May 20 16:56:55 2010 +++ new/src/cpu/x86/vm/x86_32.ad Thu May 20 16:56:55 2010 @@ -1377,6 +1377,12 @@ // registers? True for Intel but false for most RISCs const bool Matcher::clone_shift_expressions = true; +bool Matcher::narrow_oop_use_complex_address() { + ShouldNotCallThis(); + return true; +} + + // Is it better to copy float constants, or load them directly from memory? // Intel can load a float constant from a direct address, requiring no // extra registers. Most RISCs will have to materialize an address into a --- old/src/cpu/x86/vm/x86_64.ad Thu May 20 16:56:55 2010 +++ new/src/cpu/x86/vm/x86_64.ad Thu May 20 16:56:55 2010 @@ -2054,6 +2054,11 @@ // into registers? True for Intel but false for most RISCs const bool Matcher::clone_shift_expressions = true; +bool Matcher::narrow_oop_use_complex_address() { + assert(UseCompressedOops, "only for comressed oops code"); + return (LogMinObjAlignmentInBytes <= 3); +} + // Is it better to copy float constants, or load them directly from // memory? Intel can load a float constant from a direct address, // requiring no extra registers. Most RISCs will have to materialize --- old/src/share/vm/opto/compile.cpp Thu May 20 16:56:56 2010 +++ new/src/share/vm/opto/compile.cpp Thu May 20 16:56:56 2010 @@ -2183,7 +2183,7 @@ Node* new_in1 = in1->clone(); new_in1->as_DecodeN()->set_type(t); - if (!Matcher::clone_shift_expressions) { + if (!Matcher::narrow_oop_use_complex_address()) { // // x86, ARM and friends can handle 2 adds in addressing mode // and Matcher can fold a DecodeN node into address by using @@ -2291,7 +2291,7 @@ assert(!n->in(1)->is_EncodeP(), "should be optimized out"); // DecodeN could be pinned on Sparc where it can't be fold into // an address expression, see the code for Op_CastPP above. - assert(n->in(0) == NULL || !Matcher::clone_shift_expressions, "no control except on sparc"); + assert(n->in(0) == NULL || !Matcher::narrow_oop_use_complex_address(), "no control except on sparc"); break; case Op_EncodeP: { --- old/src/share/vm/opto/lcm.cpp Thu May 20 16:56:57 2010 +++ new/src/share/vm/opto/lcm.cpp Thu May 20 16:56:57 2010 @@ -32,7 +32,8 @@ // with suitable memory ops nearby. Use the memory op to do the NULL check. // I can generate a memory op if there is not one nearby. // The proj is the control projection for the not-null case. -// The val is the pointer being checked for nullness. +// The val is the pointer being checked for nullness or +// decodeHeapOop_not_null node if it did not fold into address. void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) { // Assume if null check need for 0 offset then always needed // Intel solaris doesn't support any null checks yet and no @@ -96,6 +97,10 @@ } } + // Check for decodeHeapOop_not_null node which did not fold into address. + bool is_decoden = val->is_Mach() && + (val->as_Mach()->ideal_Opcode() == Op_DecodeN); + // Search the successor block for a load or store who's base value is also // the tested value. There may be several. Node_List *out = new Node_List(Thread::current()->resource_area()); @@ -148,7 +153,8 @@ if( !mach->needs_anti_dependence_check() ) continue; // Not an memory op; skip it { - // Check that value is used in memory address. + // Check that value is used in memory address in + // instructions with embedded load (CmpP val1,(val2+off)). Node* base; Node* index; const MachOper* oper = mach->memory_inputs(base, index); @@ -213,7 +219,11 @@ uint vidx = 0; // Capture index of value into memop uint j; for( j = mach->req()-1; j > 0; j-- ) { - if( mach->in(j) == val ) vidx = j; + if( mach->in(j) == val ) { + vidx = j; + // Ignore DecodeN val since it could be hoisted to where needed. + if( is_decoden ) continue; + } // Block of memory-op input Block *inb = cfg->_bbs[mach->in(j)->_idx]; Block *b = this; // Start from nul check @@ -271,6 +281,15 @@ implicit_null_checks++; // Hoist the memory candidate up to the end of the test block. + if( is_decoden ) { + // Check if we need to hoist DecodeN val first. + Block *valb = cfg->_bbs[val->_idx]; + if( this != valb && this->_dom_depth < valb->_dom_depth ) { + valb->find_remove(val); + this->add_inst(val); + cfg->_bbs.map(val->_idx,this); + } + } Block *old_block = cfg->_bbs[best->_idx]; old_block->find_remove(best); add_inst(best); --- old/src/share/vm/opto/matcher.cpp Thu May 20 16:56:57 2010 +++ new/src/share/vm/opto/matcher.cpp Thu May 20 16:56:57 2010 @@ -1334,7 +1334,7 @@ if( j == max_scan ) // No post-domination before scan end? return true; // Then break the match tree up } - if (m->is_DecodeN() && Matcher::clone_shift_expressions) { + if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) { // These are commonly used in address expressions and can // efficiently fold into them on X64 in some cases. return false; @@ -2110,8 +2110,8 @@ _null_check_tests.push(proj); Node* val = cmp->in(1); #ifdef _LP64 - if (UseCompressedOops && !Matcher::clone_shift_expressions && - val->bottom_type()->isa_narrowoop()) { + if (val->bottom_type()->isa_narrowoop() && + !Matcher::narrow_oop_use_complex_address()) { // // Look for DecodeN node which should be pinned to orig_proj. // On platforms (Sparc) which can not handle 2 adds --- old/src/share/vm/opto/matcher.hpp Thu May 20 16:56:58 2010 +++ new/src/share/vm/opto/matcher.hpp Thu May 20 16:56:58 2010 @@ -352,6 +352,8 @@ // registers? True for Intel but false for most RISCs static const bool clone_shift_expressions; + static bool narrow_oop_use_complex_address(); + // Is it better to copy float constants, or load them directly from memory? // Intel can load a float constant from a direct address, requiring no // extra registers. Most RISCs will have to materialize an address into a