< prev index next >

src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp

Print this page
rev 12409 : 8169177: aarch64: SIGSEGV when "-XX:+ZeroTLAB" is specified along with GC options
Summary: Add zero-initialization to C1 for fast TLAB refills
Reviewed-by: aph, drwhite
Contributed-by: kavitha.natarajan@linaro.org

@@ -193,119 +193,47 @@
   } else if (UseCompressedClassPointers) {
     store_klass_gap(obj, zr);
   }
 }
 
-// Zero words; len is in bytes
-// Destroys all registers except addr
-// len must be a nonzero multiple of wordSize
-void C1_MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
-  assert_different_registers(addr, len, t1, rscratch1, rscratch2);
-
-#ifdef ASSERT
-  { Label L;
-    tst(len, BytesPerWord - 1);
-    br(Assembler::EQ, L);
-    stop("len is not a multiple of BytesPerWord");
-    bind(L);
-  }
-#endif
-
-#ifndef PRODUCT
-  block_comment("zero memory");
-#endif
-
-  Label loop;
-  Label entry;
-
-//  Algorithm:
-//
-//    scratch1 = cnt & 7;
-//    cnt -= scratch1;
-//    p += scratch1;
-//    switch (scratch1) {
-//      do {
-//        cnt -= 8;
-//          p[-8] = 0;
-//        case 7:
-//          p[-7] = 0;
-//        case 6:
-//          p[-6] = 0;
-//          // ...
-//        case 1:
-//          p[-1] = 0;
-//        case 0:
-//          p += 8;
-//      } while (cnt);
-//    }
-
-  const int unroll = 8; // Number of str(zr) instructions we'll unroll
-
-  lsr(len, len, LogBytesPerWord);
-  andr(rscratch1, len, unroll - 1);  // tmp1 = cnt % unroll
-  sub(len, len, rscratch1);      // cnt -= unroll
-  // t1 always points to the end of the region we're about to zero
-  add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
-  adr(rscratch2, entry);
-  sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
-  br(rscratch2);
-  bind(loop);
-  sub(len, len, unroll);
-  for (int i = -unroll; i < 0; i++)
-    str(zr, Address(t1, i * wordSize));
-  bind(entry);
-  add(t1, t1, unroll * wordSize);
-  cbnz(len, loop);
-}
-
 // preserves obj, destroys len_in_bytes
 void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
+  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
   Label done;
-  assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
-  assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
-  Register index = len_in_bytes;
-  // index is positive and ptr sized
-  subs(index, index, hdr_size_in_bytes);
+
+  // len_in_bytes is positive and ptr sized
+  subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
   br(Assembler::EQ, done);
-  // note: for the remaining code to work, index must be a multiple of BytesPerWord
-#ifdef ASSERT
-  { Label L;
-    tst(index, BytesPerWord - 1);
-    br(Assembler::EQ, L);
-    stop("index is not a multiple of BytesPerWord");
-    bind(L);
-  }
-#endif
 
   // Preserve obj
   if (hdr_size_in_bytes)
     add(obj, obj, hdr_size_in_bytes);
-  zero_memory(obj, index, t1);
+  zero_memory(obj, len_in_bytes, t1);
   if (hdr_size_in_bytes)
     sub(obj, obj, hdr_size_in_bytes);
 
-  // done
   bind(done);
 }
 
 
 void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) {
   assert_different_registers(obj, t1, t2); // XXX really?
   assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
 
   try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
 
-  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
+  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
 }
 
-void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
          "con_size_in_bytes is not multiple of alignment");
   const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
 
   initialize_header(obj, klass, noreg, t1, t2);
 
+  if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
   // clear rest of allocated space
   const Register index = t2;
   const int threshold = 16 * BytesPerWord;   // approximate break even point for code size (see comments below)
   if (var_size_in_bytes != noreg) {
     mov(index, var_size_in_bytes);

@@ -344,10 +272,11 @@
       bind(entry_point);
     add(rscratch1, rscratch1, unroll * wordSize);
     cbnz(index, loop);
 
   }
+  }
 
   membar(StoreStore);
 
   if (CURRENT_ENV->dtrace_alloc_probes()) {
     assert(obj == r0, "must be");
< prev index next >