--- old/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp 2016-01-08 09:42:42.813566923 +0100 +++ new/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp 2016-01-08 09:42:42.541566935 +0100 @@ -182,54 +182,13 @@ // preserves obj, destroys len_in_bytes void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); Label done; - assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different"); - assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord"); - Register index = len_in_bytes; - // index is positive and ptr sized - subptr(index, hdr_size_in_bytes); - jcc(Assembler::zero, done); - // initialize topmost word, divide index by 2, check if odd and test if zero - // note: for the remaining code to work, index must be a multiple of BytesPerWord -#ifdef ASSERT - { Label L; - testptr(index, BytesPerWord - 1); - jcc(Assembler::zero, L); - stop("index is not a multiple of BytesPerWord"); - bind(L); - } -#endif - xorptr(t1, t1); // use _zero reg to clear memory (shorter code) - if (UseIncDec) { - shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set - } else { - shrptr(index, 2); // use 2 instructions to avoid partial flag stall - shrptr(index, 1); - } -#ifndef _LP64 - // index could have been not a multiple of 8 (i.e., bit 2 was set) - { Label even; - // note: if index was a multiple of 8, than it cannot - // be 0 now otherwise it must have been 0 before - // => if it is even, we don't need to check for 0 again - jcc(Assembler::carryClear, even); - // clear topmost word (no jump needed if conditional assignment would work here) - movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 0*BytesPerWord), t1); - // index could be 0 now, need to check again - jcc(Assembler::zero, done); - bind(even); - } -#endif // !_LP64 - // initialize remaining object fields: rdx is a multiple of 2 now - { Label loop; - bind(loop); - movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 1*BytesPerWord), t1); - NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 2*BytesPerWord), t1);) - decrement(index); - jcc(Assembler::notZero, loop); - } - // done + // len_in_bytes is positive and ptr sized + subptr(len_in_bytes, hdr_size_in_bytes); + jcc(Assembler::zero, done); + zero_memory(obj, len_in_bytes, hdr_size_in_bytes, t1); bind(done); } @@ -241,47 +200,49 @@ try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); - initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2); + initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); } -void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) { +void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) { assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "con_size_in_bytes is not multiple of alignment"); const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; initialize_header(obj, klass, noreg, t1, t2); - // clear rest of allocated space - const Register t1_zero = t1; - const Register index = t2; - const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below) - if (var_size_in_bytes != noreg) { - mov(index, var_size_in_bytes); - initialize_body(obj, index, hdr_size_in_bytes, t1_zero); - } else if (con_size_in_bytes <= threshold) { - // use explicit null stores - // code size = 2 + 3*n bytes (n = number of fields to clear) - xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) - for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord) - movptr(Address(obj, i), t1_zero); - } else if (con_size_in_bytes > hdr_size_in_bytes) { - // use loop to null out the fields - // code size = 16 bytes for even n (n = number of fields to clear) - // initialize last object field first if odd number of fields - xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) - movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3); - // initialize last object field if constant size is odd - if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0) - movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero); - // initialize remaining object fields: rdx is a multiple of 2 - { Label loop; - bind(loop); - movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)), - t1_zero); - NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)), - t1_zero);) - decrement(index); - jcc(Assembler::notZero, loop); + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { + // clear rest of allocated space + const Register t1_zero = t1; + const Register index = t2; + const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below) + if (var_size_in_bytes != noreg) { + mov(index, var_size_in_bytes); + initialize_body(obj, index, hdr_size_in_bytes, t1_zero); + } else if (con_size_in_bytes <= threshold) { + // use explicit null stores + // code size = 2 + 3*n bytes (n = number of fields to clear) + xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) + for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord) + movptr(Address(obj, i), t1_zero); + } else if (con_size_in_bytes > hdr_size_in_bytes) { + // use loop to null out the fields + // code size = 16 bytes for even n (n = number of fields to clear) + // initialize last object field first if odd number of fields + xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) + movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3); + // initialize last object field if constant size is odd + if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0) + movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero); + // initialize remaining object fields: rdx is a multiple of 2 + { Label loop; + bind(loop); + movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)), + t1_zero); + NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)), + t1_zero);) + decrement(index); + jcc(Assembler::notZero, loop); + } } }