jdk6 Cdiff hotspot/src/cpu/sparc/vm/assembler

hotspot/src/cpu/sparc/vm/assembler_sparc.cpp

rev 611 : Merge


*** 1,10 ****
- #ifdef USE_PRAGMA_IDENT_SRC
- #pragma ident "@(#)assembler_sparc.cpp  1.208 07/08/29 13:42:15 JVM"
- #endif
  /*
!  * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.
--- 1,7 ----
  /*
!  * Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.
*** 131,140 ****
--- 128,151 ----
  
  int AbstractAssembler::code_fill_byte() {
    return 0x00;                  // illegal instruction 0x00000000
  }
  
+ Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
+   switch (in) {
+   case rc_z:   return equal;
+   case rc_lez: return lessEqual;
+   case rc_lz:  return less;
+   case rc_nz:  return notEqual;
+   case rc_gz:  return greater;
+   case rc_gez: return greaterEqual;
+   default:
+     ShouldNotReachHere();
+   }
+   return equal;
+ }
+ 
  // Generate a bunch 'o stuff (including v9's
  #ifndef PRODUCT
  void Assembler::test_v9() {
    add(    G0, G1, G2 );
    add(    G3,  0, G4 );
*** 1214,1248 ****
  
    st_ptr(oop_result, vm_result_addr);
  }
  
  
! void MacroAssembler::store_check(Register tmp, Register obj) {
!   // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
! 
!   /* $$$ This stuff needs to go into one of the BarrierSet generator
!      functions.  (The particular barrier sets will have to be friends of
!      MacroAssembler, I guess.) */
!   BarrierSet* bs = Universe::heap()->barrier_set();
!   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
!   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
!   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
  #ifdef _LP64
    srlx(obj, CardTableModRefBS::card_shift, obj);
  #else
    srl(obj, CardTableModRefBS::card_shift, obj);
  #endif
    assert( tmp != obj, "need separate temp reg");
!   Address rs(tmp, (address)ct->byte_map_base);
    load_address(rs);
    stb(G0, rs.base(), obj);
  }
  
- void MacroAssembler::store_check(Register tmp, Register obj, Register offset) {
-   store_check(tmp, obj);
- }
- 
  // %%% Note:  The following six instructions have been moved,
  //            unchanged, from assembler_sparc.inline.hpp.
  //            They will be refactored at a later date.
  
  void MacroAssembler::sethi(intptr_t imm22a, 
--- 1225,1247 ----
  
    st_ptr(oop_result, vm_result_addr);
  }
  
  
! void MacroAssembler::card_table_write(jbyte* byte_map_base,
!                                       Register tmp, Register obj) {
  #ifdef _LP64
    srlx(obj, CardTableModRefBS::card_shift, obj);
  #else
    srl(obj, CardTableModRefBS::card_shift, obj);
  #endif
    assert( tmp != obj, "need separate temp reg");
!   Address rs(tmp, (address)byte_map_base);
    load_address(rs);
    stb(G0, rs.base(), obj);
  }
  
  // %%% Note:  The following six instructions have been moved,
  //            unchanged, from assembler_sparc.inline.hpp.
  //            They will be refactored at a later date.
  
  void MacroAssembler::sethi(intptr_t imm22a,
*** 1524,1533 ****
--- 1523,1547 ----
    assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
    int oop_index = oop_recorder()->find_index(obj);
    return Address(d, address(obj), oop_Relocation::spec(oop_index));
  }
  
+ void  MacroAssembler::set_narrow_oop(jobject obj, Register d) {
+   assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+   int oop_index = oop_recorder()->find_index(obj);
+   RelocationHolder rspec = oop_Relocation::spec(oop_index);
+ 
+   assert_not_delayed();
+   // Relocation with special format (see relocInfo_sparc.hpp).
+   relocate(rspec, 1);
+   // Assembler::sethi(0x3fffff, d);
+   emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) );
+   // Don't add relocation for 'add'. Do patching during 'sethi' processing.
+   add(d, 0x3ff, d);
+ 
+ }
+ 
  
  void MacroAssembler::align(int modulus) {
    while (offset() % modulus != 0) nop();
  }
  
*** 1649,1663 ****
    // plausibility check for oops
    if (!VerifyOops) return;
  
    if (reg == G0)  return;       // always NULL, which is always an oop
  
!   char buffer[16];
    sprintf(buffer, "%d", line);
!   int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
    char * real_msg = new char[len];
!   sprintf(real_msg, "%s (%s:%d)", msg, file, line);
  
    // Call indirectly to solve generation ordering problem
    Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
  
    // Make some space on stack above the current register window.
--- 1663,1687 ----
    // plausibility check for oops
    if (!VerifyOops) return;
  
    if (reg == G0)  return;       // always NULL, which is always an oop
  
!   char buffer[64];
! #ifdef COMPILER1
!   if (CommentedAssembly) {
!     snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
!     block_comment(buffer);
!   }
! #endif
! 
!   int len = strlen(file) + strlen(msg) + 1 + 4;
    sprintf(buffer, "%d", line);
!   len += strlen(buffer);
!   sprintf(buffer, " at offset %d ", offset());
!   len += strlen(buffer);
    char * real_msg = new char[len];
!   sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);
  
    // Call indirectly to solve generation ordering problem
    Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
  
    // Make some space on stack above the current register window.
*** 1780,1800 ****
      delayed()->nop();
    }
  
    // Check the klassOop of this object for being in the right area of memory.
    // Cannot do the load in the delay above slot in case O0 is null
!   ld_ptr(Address(O0_obj, 0, oopDesc::klass_offset_in_bytes()), O0_obj);
    // assert((klass & klass_mask) == klass_bits);
    if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
      set(Universe::verify_klass_mask(), O2_mask);
    if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
      set(Universe::verify_klass_bits(), O3_bits);
    and3(O0_obj, O2_mask, O4_temp);
    cmp(O4_temp, O3_bits);
    brx(notEqual, false, pn, fail);
    // Check the klass's klass
!   delayed()->ld_ptr(Address(O0_obj, 0, oopDesc::klass_offset_in_bytes()), O0_obj);
    and3(O0_obj, O2_mask, O4_temp);
    cmp(O4_temp, O3_bits);
    brx(notEqual, false, pn, fail);
    delayed()->wrccr( O5_save_flags ); // Restore CCR's
    
--- 1804,1825 ----
      delayed()->nop();
    }
  
    // Check the klassOop of this object for being in the right area of memory.
    // Cannot do the load in the delay above slot in case O0 is null
!   load_klass(O0_obj, O0_obj);
    // assert((klass & klass_mask) == klass_bits);
    if( Universe::verify_klass_mask() != Universe::verify_oop_mask() )
      set(Universe::verify_klass_mask(), O2_mask);
    if( Universe::verify_klass_bits() != Universe::verify_oop_bits() )
      set(Universe::verify_klass_bits(), O3_bits);
    and3(O0_obj, O2_mask, O4_temp);
    cmp(O4_temp, O3_bits);
    brx(notEqual, false, pn, fail);
+   delayed()->nop();
    // Check the klass's klass
!   load_klass(O0_obj, O0_obj);
    and3(O0_obj, O2_mask, O4_temp);
    cmp(O4_temp, O3_bits);
    brx(notEqual, false, pn, fail);
    delayed()->wrccr( O5_save_flags ); // Restore CCR's
  
*** 2044,2053 ****
--- 2069,2099 ----
    tst(s1);
    br ( notZero, a, p, L );
  #endif
  }
  
+ void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                      Register s1, address d,
+                                      relocInfo::relocType rt ) {
+   if (VM_Version::v9_instructions_work()) {
+     bpr(rc, a, p, s1, d, rt);
+   } else {
+     tst(s1);
+     br(reg_cond_to_cc_cond(rc), a, p, d, rt);
+   }
+ }
+ 
+ void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                      Register s1, Label& L ) {
+   if (VM_Version::v9_instructions_work()) {
+     bpr(rc, a, p, s1, L);
+   } else {
+     tst(s1);
+     br(reg_cond_to_cc_cond(rc), a, p, L);
+   }
+ }
+ 
  
  // instruction sequences factored across compiler & interpreter
  
  
  void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low,
*** 2567,2577 ****
  
      restore();
    }
  }
  
! void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg,
                                            Label& done, Label* slow_case,
                                            BiasedLockingCounters* counters) {
    assert(UseBiasedLocking, "why call this otherwise?");
  
    if (PrintBiasedLockingStatistics) {
--- 2613,2624 ----
  
      restore();
    }
  }
  
! void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
!                                           Register temp_reg,
                                            Label& done, Label* slow_case,
                                            BiasedLockingCounters* counters) {
    assert(UseBiasedLocking, "why call this otherwise?");
  
    if (PrintBiasedLockingStatistics) {
*** 2589,2600 ****
    // pointers to allow age to be placed into low bits
    assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
    and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
    cmp(temp_reg, markOopDesc::biased_lock_pattern);
    brx(Assembler::notEqual, false, Assembler::pn, cas_label);
  
!   delayed()->ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg);
    ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
    or3(G2_thread, temp_reg, temp_reg);
    xor3(mark_reg, temp_reg, temp_reg);
    andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
    if (counters != NULL) {
--- 2636,2648 ----
    // pointers to allow age to be placed into low bits
    assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
    and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
    cmp(temp_reg, markOopDesc::biased_lock_pattern);
    brx(Assembler::notEqual, false, Assembler::pn, cas_label);
+   delayed()->nop();
  
!   load_klass(obj_reg, temp_reg);
    ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
    or3(G2_thread, temp_reg, temp_reg);
    xor3(mark_reg, temp_reg, temp_reg);
    andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg);
    if (counters != NULL) {
*** 2642,2653 ****
    // don't accidentally blow away another thread's valid bias.
    delayed()->and3(mark_reg,
                    markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
                    mark_reg);
    or3(G2_thread, mark_reg, temp_reg);
!   casx_under_lock(mark_addr.base(), mark_reg, temp_reg, 
!                   (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
    // If the biasing toward our thread failed, this means that
    // another thread succeeded in biasing it toward itself and we
    // need to revoke that bias. The revocation will occur in the
    // interpreter runtime in the slow case.
    cmp(mark_reg, temp_reg);
--- 2690,2700 ----
    // don't accidentally blow away another thread's valid bias.
    delayed()->and3(mark_reg,
                    markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place,
                    mark_reg);
    or3(G2_thread, mark_reg, temp_reg);
!   casn(mark_addr.base(), mark_reg, temp_reg);
    // If the biasing toward our thread failed, this means that
    // another thread succeeded in biasing it toward itself and we
    // need to revoke that bias. The revocation will occur in the
    // interpreter runtime in the slow case.
    cmp(mark_reg, temp_reg);
*** 2669,2683 ****
    // bias in the current epoch. In other words, we allow transfer of
    // the bias from one thread to another directly in this situation.
    //
    // FIXME: due to a lack of registers we currently blow away the age
    // bits in this situation. Should attempt to preserve them.
!   ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg);
    ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
    or3(G2_thread, temp_reg, temp_reg);
!   casx_under_lock(mark_addr.base(), mark_reg, temp_reg, 
!                   (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
    // If the biasing toward our thread failed, this means that
    // another thread succeeded in biasing it toward itself and we
    // need to revoke that bias. The revocation will occur in the
    // interpreter runtime in the slow case.
    cmp(mark_reg, temp_reg);
--- 2716,2729 ----
    // bias in the current epoch. In other words, we allow transfer of
    // the bias from one thread to another directly in this situation.
    //
    // FIXME: due to a lack of registers we currently blow away the age
    // bits in this situation. Should attempt to preserve them.
!   load_klass(obj_reg, temp_reg);
    ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
    or3(G2_thread, temp_reg, temp_reg);
!   casn(mark_addr.base(), mark_reg, temp_reg);
    // If the biasing toward our thread failed, this means that
    // another thread succeeded in biasing it toward itself and we
    // need to revoke that bias. The revocation will occur in the
    // interpreter runtime in the slow case.
    cmp(mark_reg, temp_reg);
*** 2701,2714 ****
    // bias of this particular object, so it's okay to continue in the
    // normal locking code.
    //
    // FIXME: due to a lack of registers we currently blow away the age
    // bits in this situation. Should attempt to preserve them.
!   ld_ptr(Address(obj_reg, 0, oopDesc::klass_offset_in_bytes()), temp_reg);
    ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
!   casx_under_lock(mark_addr.base(), mark_reg, temp_reg, 
!                   (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
    // Fall through to the normal CAS-based lock, because no matter what
    // the result of the above CAS, some thread must have succeeded in
    // removing the bias bit from the object's header.
    if (counters != NULL) {
      cmp(mark_reg, temp_reg);
--- 2747,2759 ----
    // bias of this particular object, so it's okay to continue in the
    // normal locking code.
    //
    // FIXME: due to a lack of registers we currently blow away the age
    // bits in this situation. Should attempt to preserve them.
!   load_klass(obj_reg, temp_reg);
    ld_ptr(Address(temp_reg, 0, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()), temp_reg);
!   casn(mark_addr.base(), mark_reg, temp_reg);
    // Fall through to the normal CAS-based lock, because no matter what
    // the result of the above CAS, some thread must have succeeded in
    // removing the bias bit from the object's header.
    if (counters != NULL) {
      cmp(mark_reg, temp_reg);
*** 2766,2777 ****
  // and compiler_unlock_object.  Critically, the key factor is code size, not path
  // length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
  // effect). 
    
  
! void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch,
!                                           BiasedLockingCounters* counters) {
     Address mark_addr(Roop, 0, oopDesc::mark_offset_in_bytes());
  
     verify_oop(Roop);
     Label done ; 
  
--- 2811,2824 ----
  // and compiler_unlock_object.  Critically, the key factor is code size, not path
  // length.  (Simply experiments to pad CLO with unexecuted NOPs demonstrte the
  // effect).
  
  
! void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark,
!                                           Register Rbox, Register Rscratch,
!                                           BiasedLockingCounters* counters,
!                                           bool try_bias) {
     Address mark_addr(Roop, 0, oopDesc::mark_offset_in_bytes());
  
     verify_oop(Roop);
     Label done ;
  
*** 2789,2799 ****
     if (EmitSync & 2) { 
  
       // Fetch object's markword
       ld_ptr(mark_addr, Rmark);
  
!      if (UseBiasedLocking) {
          biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
       }
    
       // Save Rbox in Rscratch to be used for the cas operation
       mov(Rbox, Rscratch);
--- 2836,2846 ----
     if (EmitSync & 2) {
  
       // Fetch object's markword
       ld_ptr(mark_addr, Rmark);
  
!      if (try_bias) {
          biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
       }
  
       // Save Rbox in Rscratch to be used for the cas operation
       mov(Rbox, Rscratch);
*** 2832,2842 ****
     if (EmitSync & 256) { 
        Label IsInflated ; 
  
        ld_ptr (mark_addr, Rmark);           // fetch obj->mark
        // Triage: biased, stack-locked, neutral, inflated
!       if (UseBiasedLocking) {
          biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
          // Invariant: if control reaches this point in the emitted stream
          // then Rmark has not been modified.  
        }
  
--- 2879,2889 ----
     if (EmitSync & 256) {
        Label IsInflated ;
  
        ld_ptr (mark_addr, Rmark);           // fetch obj->mark
        // Triage: biased, stack-locked, neutral, inflated
!       if (try_bias) {
          biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
          // Invariant: if control reaches this point in the emitted stream
          // then Rmark has not been modified.
        }
  
*** 2896,2906 ****
  // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ; 
  
        ld_ptr (mark_addr, Rmark);           // fetch obj->mark
        // Triage: biased, stack-locked, neutral, inflated
  
!       if (UseBiasedLocking) {
          biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
          // Invariant: if control reaches this point in the emitted stream
          // then Rmark has not been modified.  
        }
        andcc  (Rmark, 2, G0) ; 
--- 2943,2953 ----
  // prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads) ;
  
        ld_ptr (mark_addr, Rmark);           // fetch obj->mark
        // Triage: biased, stack-locked, neutral, inflated
  
!       if (try_bias) {
          biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters);
          // Invariant: if control reaches this point in the emitted stream
          // then Rmark has not been modified.
        }
        andcc  (Rmark, 2, G0) ;
*** 2990,3011 ****
     }
  
     bind   (done) ; 
  }
  
! void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, Register Rscratch) {
     Address mark_addr(Roop, 0, oopDesc::mark_offset_in_bytes());
  
     Label done ; 
  
     if (EmitSync & 4) { 
       cmp  (SP, G0) ; 
       return ; 
     }
      
     if (EmitSync & 8) { 
!      if (UseBiasedLocking) {
          biased_locking_exit(mark_addr, Rscratch, done);
       }
  
       // Test first if it is a fast recursive unlock
       ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
--- 3037,3060 ----
     }
  
     bind   (done) ;
  }
  
! void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark,
!                                             Register Rbox, Register Rscratch,
!                                             bool try_bias) {
     Address mark_addr(Roop, 0, oopDesc::mark_offset_in_bytes());
  
     Label done ;
  
     if (EmitSync & 4) {
       cmp  (SP, G0) ;
       return ;
     }
  
     if (EmitSync & 8) {
!      if (try_bias) {
          biased_locking_exit(mark_addr, Rscratch, done);
       }
  
       // Test first if it is a fast recursive unlock
       ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark);
*** 3028,3038 ****
     // is too large performance rolls abruptly off a cliff.  
     // This could be related to inlining policies, code cache management, or
     // I$ effects.  
     Label LStacked ; 
  
!    if (UseBiasedLocking) {
        // TODO: eliminate redundant LDs of obj->mark
        biased_locking_exit(mark_addr, Rscratch, done);
     }
  
     ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ; 
--- 3077,3087 ----
     // is too large performance rolls abruptly off a cliff.
     // This could be related to inlining policies, code cache management, or
     // I$ effects.
     Label LStacked ;
  
!    if (try_bias) {
        // TODO: eliminate redundant LDs of obj->mark
        biased_locking_exit(mark_addr, Rscratch, done);
     }
  
     ld_ptr (Roop, oopDesc::mark_offset_in_bytes(), Rmark) ;
*** 3225,3234 ****
--- 3274,3288 ----
    // make sure arguments make sense
    assert_different_registers(obj, var_size_in_bytes, t1, t2);
    assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
    assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
  
+   if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+     // No allocation in the shared eden.
+     br(Assembler::always, false, Assembler::pt, slow_case);
+     delayed()->nop();
+   } else {
      // get eden boundaries
      // note: we need both top & top_addr!
      const Register top_addr = t1;
      const Register end      = t2;
  
*** 3287,3296 ****
--- 3341,3351 ----
        delayed()->nop();
        stop("eden top is not properly aligned");
        bind(L);
      }
  #endif // ASSERT
+   }
  }
  
  
  void MacroAssembler::tlab_allocate(
    Register obj,                        // result: pointer to object after successful allocation
*** 3405,3421 ****
    delayed()->nop();
  
    set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
    st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
    // set klass to intArrayKlass
-   set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
-   ld_ptr(t2, 0, t2);
-   st_ptr(t2, top, oopDesc::klass_offset_in_bytes());
    sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
    add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
    sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
    st(t1, top, arrayOopDesc::length_offset_in_bytes());
    verify_oop(top);
  
    // refill the tlab with an eden allocation
    bind(do_refill);
    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
--- 3460,3478 ----
    delayed()->nop();
  
    set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2);
    st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word
    // set klass to intArrayKlass
    sub(t1, typeArrayOopDesc::header_size(T_INT), t1);
    add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1);
    sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1);
    st(t1, top, arrayOopDesc::length_offset_in_bytes());
+   set((intptr_t)Universe::intArrayKlassObj_addr(), t2);
+   ld_ptr(t2, 0, t2);
+   // store klass last.  concurrent gcs assumes klass length is valid if
+   // klass field is not null.
+   store_klass(t2, top);
    verify_oop(top);
  
    // refill the tlab with an eden allocation
    bind(do_refill);
    ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1);
*** 3535,3539 ****
--- 3592,4211 ----
    for (int i = 0; i< StackShadowPages-1; i++) {
      set((-i*offset)+STACK_BIAS, Rscratch);
      st(G0, Rtsp, Rscratch);
    }
  }
+ 
+ ///////////////////////////////////////////////////////////////////////////////////
+ #ifndef SERIALGC
+ 
+ static uint num_stores = 0;
+ static uint num_null_pre_stores = 0;
+ 
+ static void count_null_pre_vals(void* pre_val) {
+   num_stores++;
+   if (pre_val == NULL) num_null_pre_stores++;
+   if ((num_stores % 1000000) == 0) {
+     tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
+                   num_stores, num_null_pre_stores,
+                   100.0*(float)num_null_pre_stores/(float)num_stores);
+   }
+ }
+ 
+ static address satb_log_enqueue_with_frame = 0;
+ static u_char* satb_log_enqueue_with_frame_end = 0;
+ 
+ static address satb_log_enqueue_frameless = 0;
+ static u_char* satb_log_enqueue_frameless_end = 0;
+ 
+ static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
+ 
+ // The calls to this don't work.  We'd need to do a fair amount of work to
+ // make it work.
+ static void check_index(int ind) {
+   assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
+          "Invariants.")
+ }
+ 
+ static void generate_satb_log_enqueue(bool with_frame) {
+   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
+   CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+   MacroAssembler masm(&buf);
+   address start = masm.pc();
+   Register pre_val;
+ 
+   Label refill, restart;
+   if (with_frame) {
+     masm.save_frame(0);
+     pre_val = I0;  // Was O0 before the save.
+   } else {
+     pre_val = O0;
+   }
+   int satb_q_index_byte_offset =
+     in_bytes(JavaThread::satb_mark_queue_offset() +
+              PtrQueue::byte_offset_of_index());
+   int satb_q_buf_byte_offset =
+     in_bytes(JavaThread::satb_mark_queue_offset() +
+              PtrQueue::byte_offset_of_buf());
+   assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
+          in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
+          "check sizes in assembly below");
+ 
+   masm.bind(restart);
+   masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
+ 
+   masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
+   // If the branch is taken, no harm in executing this in the delay slot.
+   masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+   masm.sub(L0, oopSize, L0);
+ 
+   masm.st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
+   if (!with_frame) {
+     // Use return-from-leaf
+     masm.retl();
+     masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+   } else {
+     // Not delayed.
+     masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+   }
+   if (with_frame) {
+     masm.ret();
+     masm.delayed()->restore();
+   }
+   masm.bind(refill);
+ 
+   address handle_zero =
+     CAST_FROM_FN_PTR(address,
+                      &SATBMarkQueueSet::handle_zero_index_for_thread);
+   // This should be rare enough that we can afford to save all the
+   // scratch registers that the calling context might be using.
+   masm.mov(G1_scratch, L0);
+   masm.mov(G3_scratch, L1);
+   masm.mov(G4, L2);
+   // We need the value of O0 above (for the write into the buffer), so we
+   // save and restore it.
+   masm.mov(O0, L3);
+   // Since the call will overwrite O7, we save and restore that, as well.
+   masm.mov(O7, L4);
+   masm.call_VM_leaf(L5, handle_zero, G2_thread);
+   masm.mov(L0, G1_scratch);
+   masm.mov(L1, G3_scratch);
+   masm.mov(L2, G4);
+   masm.mov(L3, O0);
+   masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+   masm.delayed()->mov(L4, O7);
+ 
+   if (with_frame) {
+     satb_log_enqueue_with_frame = start;
+     satb_log_enqueue_with_frame_end = masm.pc();
+   } else {
+     satb_log_enqueue_frameless = start;
+     satb_log_enqueue_frameless_end = masm.pc();
+   }
+ }
+ 
+ static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
+   if (with_frame) {
+     if (satb_log_enqueue_with_frame == 0) {
+       generate_satb_log_enqueue(with_frame);
+       assert(satb_log_enqueue_with_frame != 0, "postcondition.");
+       if (G1SATBPrintStubs) {
+         tty->print_cr("Generated with-frame satb enqueue:");
+         Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
+                              satb_log_enqueue_with_frame_end,
+                              tty);
+       }
+     }
+   } else {
+     if (satb_log_enqueue_frameless == 0) {
+       generate_satb_log_enqueue(with_frame);
+       assert(satb_log_enqueue_frameless != 0, "postcondition.");
+       if (G1SATBPrintStubs) {
+         tty->print_cr("Generated frameless satb enqueue:");
+         Disassembler::decode((u_char*)satb_log_enqueue_frameless,
+                              satb_log_enqueue_frameless_end,
+                              tty);
+       }
+     }
+   }
+ }
+ 
+ void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
+   assert(offset == 0 || index == noreg, "choose one");
+ 
+   if (G1DisablePreBarrier) return;
+   // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
+   Label filtered;
+   // satb_log_barrier_work0(tmp, filtered);
+   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+     ld(G2,
+        in_bytes(JavaThread::satb_mark_queue_offset() +
+                 PtrQueue::byte_offset_of_active()),
+        tmp);
+   } else {
+     guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+               "Assumption");
+     ldsb(G2,
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   PtrQueue::byte_offset_of_active()),
+          tmp);
+   }
+   // Check on whether to annul.
+   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+   delayed() -> nop();
+ 
+   // satb_log_barrier_work1(tmp, offset);
+   if (index == noreg) {
+     if (Assembler::is_simm13(offset)) {
+       ld_ptr(obj, offset, tmp);
+     } else {
+       set(offset, tmp);
+       ld_ptr(obj, tmp, tmp);
+     }
+   } else {
+     ld_ptr(obj, index, tmp);
+   }
+ 
+   // satb_log_barrier_work2(obj, tmp, offset);
+ 
+   // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
+ 
+   const Register pre_val = tmp;
+ 
+   if (G1SATBBarrierPrintNullPreVals) {
+     save_frame(0);
+     mov(pre_val, O0);
+     // Save G-regs that target may use.
+     mov(G1, L1);
+     mov(G2, L2);
+     mov(G3, L3);
+     mov(G4, L4);
+     mov(G5, L5);
+     call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
+     delayed()->nop();
+     // Restore G-regs that target may have used.
+     mov(L1, G1);
+     mov(L2, G2);
+     mov(L3, G3);
+     mov(L4, G4);
+     mov(L5, G5);
+     restore(G0, G0, G0);
+   }
+ 
+   // Check on whether to annul.
+   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
+   delayed() -> nop();
+ 
+   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
+   // case, pre_val will be a scratch G-reg, but there's some cases in which
+   // it's an O-reg.  In the first case, do a normal call.  In the latter,
+   // do a save here and call the frameless version.
+ 
+   guarantee(pre_val->is_global() || pre_val->is_out(),
+             "Or we need to think harder.");
+   if (pre_val->is_global() && !preserve_o_regs) {
+     generate_satb_log_enqueue_if_necessary(true); // with frame.
+     call(satb_log_enqueue_with_frame);
+     delayed()->mov(pre_val, O0);
+   } else {
+     generate_satb_log_enqueue_if_necessary(false); // with frameless.
+     save_frame(0);
+     call(satb_log_enqueue_frameless);
+     delayed()->mov(pre_val->after_save(), O0);
+     restore();
+   }
+ 
+   bind(filtered);
+ }
+ 
+ static jint num_ct_writes = 0;
+ static jint num_ct_writes_filtered_in_hr = 0;
+ static jint num_ct_writes_filtered_null = 0;
+ static G1CollectedHeap* g1 = NULL;
+ 
+ static Thread* count_ct_writes(void* filter_val, void* new_val) {
+   Atomic::inc(&num_ct_writes);
+   if (filter_val == NULL) {
+     Atomic::inc(&num_ct_writes_filtered_in_hr);
+   } else if (new_val == NULL) {
+     Atomic::inc(&num_ct_writes_filtered_null);
+   } else {
+     if (g1 == NULL) {
+       g1 = G1CollectedHeap::heap();
+     }
+   }
+   if ((num_ct_writes % 1000000) == 0) {
+     jint num_ct_writes_filtered =
+       num_ct_writes_filtered_in_hr +
+       num_ct_writes_filtered_null;
+ 
+     tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
+                   "   (%5.2f%% intra-HR, %5.2f%% null).",
+                   num_ct_writes,
+                   100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
+                   100.0*(float)num_ct_writes_filtered_in_hr/
+                   (float)num_ct_writes,
+                   100.0*(float)num_ct_writes_filtered_null/
+                   (float)num_ct_writes);
+   }
+   return Thread::current();
+ }
+ 
+ static address dirty_card_log_enqueue = 0;
+ static u_char* dirty_card_log_enqueue_end = 0;
+ 
+ // This gets to assume that o0 contains the object address.
+ static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
+   BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
+   CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+   MacroAssembler masm(&buf);
+   address start = masm.pc();
+ 
+   Label not_already_dirty, restart, refill;
+ 
+ #ifdef _LP64
+   masm.srlx(O0, CardTableModRefBS::card_shift, O0);
+ #else
+   masm.srl(O0, CardTableModRefBS::card_shift, O0);
+ #endif
+   Address rs(O1, (address)byte_map_base);
+   masm.load_address(rs); // O1 := <card table base>
+   masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
+ 
+   masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+                       O2, not_already_dirty);
+   // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
+   // case, harmless if not.
+   masm.delayed()->add(O0, O1, O3);
+ 
+   // We didn't take the branch, so we're already dirty: return.
+   // Use return-from-leaf
+   masm.retl();
+   masm.delayed()->nop();
+ 
+   // Not dirty.
+   masm.bind(not_already_dirty);
+   // First, dirty it.
+   masm.stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
+   int dirty_card_q_index_byte_offset =
+     in_bytes(JavaThread::dirty_card_queue_offset() +
+              PtrQueue::byte_offset_of_index());
+   int dirty_card_q_buf_byte_offset =
+     in_bytes(JavaThread::dirty_card_queue_offset() +
+              PtrQueue::byte_offset_of_buf());
+   masm.bind(restart);
+   masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
+ 
+   masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+                       L0, refill);
+   // If the branch is taken, no harm in executing this in the delay slot.
+   masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+   masm.sub(L0, oopSize, L0);
+ 
+   masm.st_ptr(O3, L1, L0);  // [_buf + index] := I0
+   // Use return-from-leaf
+   masm.retl();
+   masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
+ 
+   masm.bind(refill);
+   address handle_zero =
+     CAST_FROM_FN_PTR(address,
+                      &DirtyCardQueueSet::handle_zero_index_for_thread);
+   // This should be rare enough that we can afford to save all the
+   // scratch registers that the calling context might be using.
+   masm.mov(G1_scratch, L3);
+   masm.mov(G3_scratch, L5);
+   // We need the value of O3 above (for the write into the buffer), so we
+   // save and restore it.
+   masm.mov(O3, L6);
+   // Since the call will overwrite O7, we save and restore that, as well.
+   masm.mov(O7, L4);
+ 
+   masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
+   masm.mov(L3, G1_scratch);
+   masm.mov(L5, G3_scratch);
+   masm.mov(L6, O3);
+   masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+   masm.delayed()->mov(L4, O7);
+ 
+   dirty_card_log_enqueue = start;
+   dirty_card_log_enqueue_end = masm.pc();
+   // XXX Should have a guarantee here about not going off the end!
+   // Does it already do so?  Do an experiment...
+ }
+ 
+ static inline void
+ generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
+   if (dirty_card_log_enqueue == 0) {
+     generate_dirty_card_log_enqueue(byte_map_base);
+     assert(dirty_card_log_enqueue != 0, "postcondition.");
+     if (G1SATBPrintStubs) {
+       tty->print_cr("Generated dirty_card enqueue:");
+       Disassembler::decode((u_char*)dirty_card_log_enqueue,
+                            dirty_card_log_enqueue_end,
+                            tty);
+     }
+   }
+ }
+ 
+ 
+ void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+ 
+   Label filtered;
+   MacroAssembler* post_filter_masm = this;
+ 
+   if (new_val == G0) return;
+   if (G1DisablePostBarrier) return;
+ 
+   G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
+   assert(bs->kind() == BarrierSet::G1SATBCT ||
+          bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+   if (G1RSBarrierRegionFilter) {
+     xor3(store_addr, new_val, tmp);
+ #ifdef _LP64
+     srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+ #else
+     srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+ #endif
+     if (G1PrintCTFilterStats) {
+       guarantee(tmp->is_global(), "Or stats won't work...");
+       // This is a sleazy hack: I'm temporarily hijacking G2, which I
+       // promise to restore.
+       mov(new_val, G2);
+       save_frame(0);
+       mov(tmp, O0);
+       mov(G2, O1);
+       // Save G-regs that target may use.
+       mov(G1, L1);
+       mov(G2, L2);
+       mov(G3, L3);
+       mov(G4, L4);
+       mov(G5, L5);
+       call(CAST_FROM_FN_PTR(address, &count_ct_writes));
+       delayed()->nop();
+       mov(O0, G2);
+       // Restore G-regs that target may have used.
+       mov(L1, G1);
+       mov(L3, G3);
+       mov(L4, G4);
+       mov(L5, G5);
+       restore(G0, G0, G0);
+     }
+     // XXX Should I predict this taken or not?  Does it mattern?
+     br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+     delayed()->nop();
+   }
+ 
+   // Now we decide how to generate the card table write.  If we're
+   // enqueueing, we call out to a generated function.  Otherwise, we do it
+   // inline here.
+ 
+   if (G1RSBarrierUseQueue) {
+     // If the "store_addr" register is an "in" or "local" register, move it to
+     // a scratch reg so we can pass it as an argument.
+     bool use_scr = !(store_addr->is_global() || store_addr->is_out());
+     // Pick a scratch register different from "tmp".
+     Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
+     // Make sure we use up the delay slot!
+     if (use_scr) {
+       post_filter_masm->mov(store_addr, scr);
+     } else {
+       post_filter_masm->nop();
+     }
+     generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
+     save_frame(0);
+     call(dirty_card_log_enqueue);
+     if (use_scr) {
+       delayed()->mov(scr, O0);
+     } else {
+       delayed()->mov(store_addr->after_save(), O0);
+     }
+     restore();
+ 
+   } else {
+ 
+ #ifdef _LP64
+     post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
+ #else
+     post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
+ #endif
+     assert( tmp != store_addr, "need separate temp reg");
+     Address rs(tmp, (address)bs->byte_map_base);
+     load_address(rs);
+     stb(G0, rs.base(), store_addr);
+   }
+ 
+   bind(filtered);
+ 
+ }
+ 
+ #endif  // SERIALGC
+ ///////////////////////////////////////////////////////////////////////////////////
+ 
+ void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+   // If we're writing constant NULL, we can skip the write barrier.
+   if (new_val == G0) return;
+   CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
+   assert(bs->kind() == BarrierSet::CardTableModRef ||
+          bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
+   card_table_write(bs->byte_map_base, tmp, store_addr);
+ }
+ 
+ void MacroAssembler::load_klass(Register src_oop, Register klass) {
+   // The number of bytes in this code is used by
+   // MachCallDynamicJavaNode::ret_addr_offset()
+   // if this changes, change that.
+   if (UseCompressedOops) {
+     lduw(src_oop, oopDesc::klass_offset_in_bytes(), klass);
+     decode_heap_oop_not_null(klass);
+   } else {
+     ld_ptr(src_oop, oopDesc::klass_offset_in_bytes(), klass);
+   }
+ }
+ 
+ void MacroAssembler::store_klass(Register klass, Register dst_oop) {
+   if (UseCompressedOops) {
+     assert(dst_oop != klass, "not enough registers");
+     encode_heap_oop_not_null(klass);
+     st(klass, dst_oop, oopDesc::klass_offset_in_bytes());
+   } else {
+     st_ptr(klass, dst_oop, oopDesc::klass_offset_in_bytes());
+   }
+ }
+ 
+ void MacroAssembler::store_klass_gap(Register s, Register d) {
+   if (UseCompressedOops) {
+     assert(s != d, "not enough registers");
+     st(s, d, oopDesc::klass_gap_offset_in_bytes());
+   }
+ }
+ 
+ void MacroAssembler::load_heap_oop(const Address& s, Register d, int offset) {
+   if (UseCompressedOops) {
+     lduw(s, d, offset);
+     decode_heap_oop(d);
+   } else {
+     ld_ptr(s, d, offset);
+   }
+ }
+ 
+ void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) {
+    if (UseCompressedOops) {
+     lduw(s1, s2, d);
+     decode_heap_oop(d, d);
+   } else {
+     ld_ptr(s1, s2, d);
+   }
+ }
+ 
+ void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) {
+    if (UseCompressedOops) {
+     lduw(s1, simm13a, d);
+     decode_heap_oop(d, d);
+   } else {
+     ld_ptr(s1, simm13a, d);
+   }
+ }
+ 
+ void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) {
+   if (UseCompressedOops) {
+     assert(s1 != d && s2 != d, "not enough registers");
+     encode_heap_oop(d);
+     st(d, s1, s2);
+   } else {
+     st_ptr(d, s1, s2);
+   }
+ }
+ 
+ void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) {
+   if (UseCompressedOops) {
+     assert(s1 != d, "not enough registers");
+     encode_heap_oop(d);
+     st(d, s1, simm13a);
+   } else {
+     st_ptr(d, s1, simm13a);
+   }
+ }
+ 
+ void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) {
+   if (UseCompressedOops) {
+     assert(a.base() != d, "not enough registers");
+     encode_heap_oop(d);
+     st(d, a, offset);
+   } else {
+     st_ptr(d, a, offset);
+   }
+ }
+ 
+ 
+ void MacroAssembler::encode_heap_oop(Register src, Register dst) {
+   assert (UseCompressedOops, "must be compressed");
+   verify_oop(src);
+   Label done;
+   if (src == dst) {
+     // optimize for frequent case src == dst
+     bpr(rc_nz, true, Assembler::pt, src, done);
+     delayed() -> sub(src, G6_heapbase, dst); // annuled if not taken
+     bind(done);
+     srlx(src, LogMinObjAlignmentInBytes, dst);
+   } else {
+     bpr(rc_z, false, Assembler::pn, src, done);
+     delayed() -> mov(G0, dst);
+     // could be moved before branch, and annulate delay,
+     // but may add some unneeded work decoding null
+     sub(src, G6_heapbase, dst);
+     srlx(dst, LogMinObjAlignmentInBytes, dst);
+     bind(done);
+   }
+ }
+ 
+ 
+ void MacroAssembler::encode_heap_oop_not_null(Register r) {
+   assert (UseCompressedOops, "must be compressed");
+   verify_oop(r);
+   sub(r, G6_heapbase, r);
+   srlx(r, LogMinObjAlignmentInBytes, r);
+ }
+ 
+ void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) {
+   assert (UseCompressedOops, "must be compressed");
+   verify_oop(src);
+   sub(src, G6_heapbase, dst);
+   srlx(dst, LogMinObjAlignmentInBytes, dst);
+ }
+ 
+ // Same algorithm as oops.inline.hpp decode_heap_oop.
+ void  MacroAssembler::decode_heap_oop(Register src, Register dst) {
+   assert (UseCompressedOops, "must be compressed");
+   Label done;
+   sllx(src, LogMinObjAlignmentInBytes, dst);
+   bpr(rc_nz, true, Assembler::pt, dst, done);
+   delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
+   bind(done);
+   verify_oop(dst);
+ }
+ 
+ void  MacroAssembler::decode_heap_oop_not_null(Register r) {
+   // Do not add assert code to this unless you change vtableStubs_sparc.cpp
+   // pd_code_size_limit.
+   // Also do not verify_oop as this is called by verify_oop.
+   assert (UseCompressedOops, "must be compressed");
+   sllx(r, LogMinObjAlignmentInBytes, r);
+   add(r, G6_heapbase, r);
+ }
+ 
+ void  MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
+   // Do not add assert code to this unless you change vtableStubs_sparc.cpp
+   // pd_code_size_limit.
+   // Also do not verify_oop as this is called by verify_oop.
+   assert (UseCompressedOops, "must be compressed");
+   sllx(src, LogMinObjAlignmentInBytes, dst);
+   add(dst, G6_heapbase, dst);
+ }
+ 
+ void MacroAssembler::reinit_heapbase() {
+   if (UseCompressedOops) {
+     // call indirectly to solve generation ordering problem
+     Address base(G6_heapbase, (address)Universe::heap_base_addr());
+     load_ptr_contents(base, G6_heapbase);
+   }
+ }