< prev index next >
src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
Print this page
rev 12409 : 8169177: aarch64: SIGSEGV when "-XX:+ZeroTLAB" is specified along with GC options
Summary: Add zero-initialization to C1 for fast TLAB refills
Reviewed-by: aph, drwhite
Contributed-by: kavitha.natarajan@linaro.org
@@ -193,119 +193,47 @@
} else if (UseCompressedClassPointers) {
store_klass_gap(obj, zr);
}
}
-// Zero words; len is in bytes
-// Destroys all registers except addr
-// len must be a nonzero multiple of wordSize
-void C1_MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
- assert_different_registers(addr, len, t1, rscratch1, rscratch2);
-
-#ifdef ASSERT
- { Label L;
- tst(len, BytesPerWord - 1);
- br(Assembler::EQ, L);
- stop("len is not a multiple of BytesPerWord");
- bind(L);
- }
-#endif
-
-#ifndef PRODUCT
- block_comment("zero memory");
-#endif
-
- Label loop;
- Label entry;
-
-// Algorithm:
-//
-// scratch1 = cnt & 7;
-// cnt -= scratch1;
-// p += scratch1;
-// switch (scratch1) {
-// do {
-// cnt -= 8;
-// p[-8] = 0;
-// case 7:
-// p[-7] = 0;
-// case 6:
-// p[-6] = 0;
-// // ...
-// case 1:
-// p[-1] = 0;
-// case 0:
-// p += 8;
-// } while (cnt);
-// }
-
- const int unroll = 8; // Number of str(zr) instructions we'll unroll
-
- lsr(len, len, LogBytesPerWord);
- andr(rscratch1, len, unroll - 1); // tmp1 = cnt % unroll
- sub(len, len, rscratch1); // cnt -= unroll
- // t1 always points to the end of the region we're about to zero
- add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
- adr(rscratch2, entry);
- sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
- br(rscratch2);
- bind(loop);
- sub(len, len, unroll);
- for (int i = -unroll; i < 0; i++)
- str(zr, Address(t1, i * wordSize));
- bind(entry);
- add(t1, t1, unroll * wordSize);
- cbnz(len, loop);
-}
-
// preserves obj, destroys len_in_bytes
void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
+ assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
Label done;
- assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
- assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
- Register index = len_in_bytes;
- // index is positive and ptr sized
- subs(index, index, hdr_size_in_bytes);
+
+ // len_in_bytes is positive and ptr sized
+ subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
br(Assembler::EQ, done);
- // note: for the remaining code to work, index must be a multiple of BytesPerWord
-#ifdef ASSERT
- { Label L;
- tst(index, BytesPerWord - 1);
- br(Assembler::EQ, L);
- stop("index is not a multiple of BytesPerWord");
- bind(L);
- }
-#endif
// Preserve obj
if (hdr_size_in_bytes)
add(obj, obj, hdr_size_in_bytes);
- zero_memory(obj, index, t1);
+ zero_memory(obj, len_in_bytes, t1);
if (hdr_size_in_bytes)
sub(obj, obj, hdr_size_in_bytes);
- // done
bind(done);
}
void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) {
assert_different_registers(obj, t1, t2); // XXX really?
assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
- initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
+ initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
}
-void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
+void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
"con_size_in_bytes is not multiple of alignment");
const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
initialize_header(obj, klass, noreg, t1, t2);
+ if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
// clear rest of allocated space
const Register index = t2;
const int threshold = 16 * BytesPerWord; // approximate break even point for code size (see comments below)
if (var_size_in_bytes != noreg) {
mov(index, var_size_in_bytes);
@@ -344,10 +272,11 @@
bind(entry_point);
add(rscratch1, rscratch1, unroll * wordSize);
cbnz(index, loop);
}
+ }
membar(StoreStore);
if (CURRENT_ENV->dtrace_alloc_probes()) {
assert(obj == r0, "must be");
< prev index next >