< prev index next >

src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp

Print this page
rev 12409 : 8169177: aarch64: SIGSEGV when "-XX:+ZeroTLAB" is specified along with GC options
Summary: Add zero-initialization to C1 for fast TLAB refills
Reviewed-by: aph, drwhite
Contributed-by: kavitha.natarajan@linaro.org


 178   } else {
 179     // This assumes that all prototype bits fit in an int32_t
 180     mov(t1, (int32_t)(intptr_t)markOopDesc::prototype());
 181   }
 182   str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
 183 
 184   if (UseCompressedClassPointers) { // Take care not to kill klass
 185     encode_klass_not_null(t1, klass);
 186     strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
 187   } else {
 188     str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
 189   }
 190 
 191   if (len->is_valid()) {
 192     strw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
 193   } else if (UseCompressedClassPointers) {
 194     store_klass_gap(obj, zr);
 195   }
 196 }
 197 
 198 // Zero words; len is in bytes
 199 // Destroys all registers except addr
 200 // len must be a nonzero multiple of wordSize
 201 void C1_MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
 202   assert_different_registers(addr, len, t1, rscratch1, rscratch2);
 203 
 204 #ifdef ASSERT
 205   { Label L;
 206     tst(len, BytesPerWord - 1);
 207     br(Assembler::EQ, L);
 208     stop("len is not a multiple of BytesPerWord");
 209     bind(L);
 210   }
 211 #endif
 212 
 213 #ifndef PRODUCT
 214   block_comment("zero memory");
 215 #endif
 216 
 217   Label loop;
 218   Label entry;
 219 
 220 //  Algorithm:
 221 //
 222 //    scratch1 = cnt & 7;
 223 //    cnt -= scratch1;
 224 //    p += scratch1;
 225 //    switch (scratch1) {
 226 //      do {
 227 //        cnt -= 8;
 228 //          p[-8] = 0;
 229 //        case 7:
 230 //          p[-7] = 0;
 231 //        case 6:
 232 //          p[-6] = 0;
 233 //          // ...
 234 //        case 1:
 235 //          p[-1] = 0;
 236 //        case 0:
 237 //          p += 8;
 238 //      } while (cnt);
 239 //    }
 240 
 241   const int unroll = 8; // Number of str(zr) instructions we'll unroll
 242 
 243   lsr(len, len, LogBytesPerWord);
 244   andr(rscratch1, len, unroll - 1);  // tmp1 = cnt % unroll
 245   sub(len, len, rscratch1);      // cnt -= unroll
 246   // t1 always points to the end of the region we're about to zero
 247   add(t1, addr, rscratch1, Assembler::LSL, LogBytesPerWord);
 248   adr(rscratch2, entry);
 249   sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
 250   br(rscratch2);
 251   bind(loop);
 252   sub(len, len, unroll);
 253   for (int i = -unroll; i < 0; i++)
 254     str(zr, Address(t1, i * wordSize));
 255   bind(entry);
 256   add(t1, t1, unroll * wordSize);
 257   cbnz(len, loop);
 258 }
 259 
 260 // preserves obj, destroys len_in_bytes
 261 void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {

 262   Label done;
 263   assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different");
 264   assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord");
 265   Register index = len_in_bytes;
 266   // index is positive and ptr sized
 267   subs(index, index, hdr_size_in_bytes);
 268   br(Assembler::EQ, done);
 269   // note: for the remaining code to work, index must be a multiple of BytesPerWord
 270 #ifdef ASSERT
 271   { Label L;
 272     tst(index, BytesPerWord - 1);
 273     br(Assembler::EQ, L);
 274     stop("index is not a multiple of BytesPerWord");
 275     bind(L);
 276   }
 277 #endif
 278 
 279   // Preserve obj
 280   if (hdr_size_in_bytes)
 281     add(obj, obj, hdr_size_in_bytes);
 282   zero_memory(obj, index, t1);
 283   if (hdr_size_in_bytes)
 284     sub(obj, obj, hdr_size_in_bytes);
 285 
 286   // done
 287   bind(done);
 288 }
 289 
 290 
 291 void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) {
 292   assert_different_registers(obj, t1, t2); // XXX really?
 293   assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
 294 
 295   try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
 296 
 297   initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2);
 298 }
 299 
 300 void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) {
 301   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
 302          "con_size_in_bytes is not multiple of alignment");
 303   const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
 304 
 305   initialize_header(obj, klass, noreg, t1, t2);
 306 

 307   // clear rest of allocated space
 308   const Register index = t2;
 309   const int threshold = 16 * BytesPerWord;   // approximate break even point for code size (see comments below)
 310   if (var_size_in_bytes != noreg) {
 311     mov(index, var_size_in_bytes);
 312     initialize_body(obj, index, hdr_size_in_bytes, t1);
 313   } else if (con_size_in_bytes <= threshold) {
 314     // use explicit null stores
 315     int i = hdr_size_in_bytes;
 316     if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
 317       str(zr, Address(obj, i));
 318       i += BytesPerWord;
 319     }
 320     for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
 321       stp(zr, zr, Address(obj, i));
 322   } else if (con_size_in_bytes > hdr_size_in_bytes) {
 323     block_comment("zero memory");
 324     // use loop to null out the fields
 325 
 326     int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;


 328 
 329     const int unroll = 8; // Number of str(zr) instructions we'll unroll
 330     int remainder = words % unroll;
 331     lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
 332 
 333     Label entry_point, loop;
 334     b(entry_point);
 335 
 336     bind(loop);
 337     sub(index, index, 1);
 338     for (int i = -unroll; i < 0; i++) {
 339       if (-i == remainder)
 340         bind(entry_point);
 341       str(zr, Address(rscratch1, i * wordSize));
 342     }
 343     if (remainder == 0)
 344       bind(entry_point);
 345     add(rscratch1, rscratch1, unroll * wordSize);
 346     cbnz(index, loop);
 347 

 348   }
 349 
 350   membar(StoreStore);
 351 
 352   if (CURRENT_ENV->dtrace_alloc_probes()) {
 353     assert(obj == r0, "must be");
 354     far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
 355   }
 356 
 357   verify_oop(obj);
 358 }
 359 void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) {
 360   assert_different_registers(obj, len, t1, t2, klass);
 361 
 362   // determine alignment mask
 363   assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
 364 
 365   // check for negative or excessive length
 366   mov(rscratch1, (int32_t)max_array_allocation_length);
 367   cmp(len, rscratch1);




 178   } else {
 179     // This assumes that all prototype bits fit in an int32_t
 180     mov(t1, (int32_t)(intptr_t)markOopDesc::prototype());
 181   }
 182   str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
 183 
 184   if (UseCompressedClassPointers) { // Take care not to kill klass
 185     encode_klass_not_null(t1, klass);
 186     strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
 187   } else {
 188     str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
 189   }
 190 
 191   if (len->is_valid()) {
 192     strw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
 193   } else if (UseCompressedClassPointers) {
 194     store_klass_gap(obj, zr);
 195   }
 196 }
 197 






























































 198 // preserves obj, destroys len_in_bytes
 199 void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
 200   assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
 201   Label done;
 202 
 203   // len_in_bytes is positive and ptr sized
 204   subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);


 205   br(Assembler::EQ, done);









 206 
 207   // Preserve obj
 208   if (hdr_size_in_bytes)
 209     add(obj, obj, hdr_size_in_bytes);
 210   zero_memory(obj, len_in_bytes, t1);
 211   if (hdr_size_in_bytes)
 212     sub(obj, obj, hdr_size_in_bytes);
 213 

 214   bind(done);
 215 }
 216 
 217 
 218 void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) {
 219   assert_different_registers(obj, t1, t2); // XXX really?
 220   assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
 221 
 222   try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
 223 
 224   initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
 225 }
 226 
 227 void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
 228   assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
 229          "con_size_in_bytes is not multiple of alignment");
 230   const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
 231 
 232   initialize_header(obj, klass, noreg, t1, t2);
 233 
 234   if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
 235      // clear rest of allocated space
 236      const Register index = t2;
 237      const int threshold = 16 * BytesPerWord;   // approximate break even point for code size (see comments below)
 238      if (var_size_in_bytes != noreg) {
 239        mov(index, var_size_in_bytes);
 240        initialize_body(obj, index, hdr_size_in_bytes, t1);
 241      } else if (con_size_in_bytes <= threshold) {
 242        // use explicit null stores
 243        int i = hdr_size_in_bytes;
 244        if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
 245          str(zr, Address(obj, i));
 246          i += BytesPerWord;
 247        }
 248        for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
 249          stp(zr, zr, Address(obj, i));
 250      } else if (con_size_in_bytes > hdr_size_in_bytes) {
 251        block_comment("zero memory");
 252       // use loop to null out the fields
 253 
 254        int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;


 256 
 257        const int unroll = 8; // Number of str(zr) instructions we'll unroll
 258        int remainder = words % unroll;
 259        lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
 260 
 261        Label entry_point, loop;
 262        b(entry_point);
 263 
 264        bind(loop);
 265        sub(index, index, 1);
 266        for (int i = -unroll; i < 0; i++) {
 267          if (-i == remainder)
 268            bind(entry_point);
 269          str(zr, Address(rscratch1, i * wordSize));
 270        }
 271        if (remainder == 0)
 272          bind(entry_point);
 273        add(rscratch1, rscratch1, unroll * wordSize);
 274        cbnz(index, loop);
 275 
 276      }
 277   }
 278 
 279   membar(StoreStore);
 280 
 281   if (CURRENT_ENV->dtrace_alloc_probes()) {
 282     assert(obj == r0, "must be");
 283     far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
 284   }
 285 
 286   verify_oop(obj);
 287 }
 288 void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) {
 289   assert_different_registers(obj, len, t1, t2, klass);
 290 
 291   // determine alignment mask
 292   assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
 293 
 294   // check for negative or excessive length
 295   mov(rscratch1, (int32_t)max_array_allocation_length);
 296   cmp(len, rscratch1);


< prev index next >