--- old/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp 2016-01-11 14:39:02.265968833 +0100 +++ new/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp 2016-01-11 14:39:02.077968842 +0100 @@ -145,8 +145,8 @@ // Initialization at startup static void startup_initialization(); - // Make an in-use tlab parsable, optionally also retiring it. - void make_parsable(bool retire); + // Make an in-use tlab parsable, optionally retiring and/or zapping it. + void make_parsable(bool retire, bool zap = true); // Retire in-use tlab before allocation of a new tlab void clear_before_allocation(); --- old/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp 2016-01-11 14:39:02.273968833 +0100 +++ new/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp 2016-01-11 14:39:02.021968845 +0100 @@ -182,54 +182,13 @@ // preserves obj, destroys len_in_bytes void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) { + assert(hdr_size_in_bytes >= 0, "header size must be positive or 0"); Label done; - assert(obj != len_in_bytes && obj != t1 && t1 != len_in_bytes, "registers must be different"); - assert((hdr_size_in_bytes & (BytesPerWord - 1)) == 0, "header size is not a multiple of BytesPerWord"); - Register index = len_in_bytes; - // index is positive and ptr sized - subptr(index, hdr_size_in_bytes); - jcc(Assembler::zero, done); - // initialize topmost word, divide index by 2, check if odd and test if zero - // note: for the remaining code to work, index must be a multiple of BytesPerWord -#ifdef ASSERT - { Label L; - testptr(index, BytesPerWord - 1); - jcc(Assembler::zero, L); - stop("index is not a multiple of BytesPerWord"); - bind(L); - } -#endif - xorptr(t1, t1); // use _zero reg to clear memory (shorter code) - if (UseIncDec) { - shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set - } else { - shrptr(index, 2); // use 2 instructions to avoid partial flag stall - shrptr(index, 1); - } -#ifndef _LP64 - // index could have been not a multiple of 8 (i.e., bit 2 was set) - { Label even; - // note: if index was a multiple of 8, than it cannot - // be 0 now otherwise it must have been 0 before - // => if it is even, we don't need to check for 0 again - jcc(Assembler::carryClear, even); - // clear topmost word (no jump needed if conditional assignment would work here) - movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 0*BytesPerWord), t1); - // index could be 0 now, need to check again - jcc(Assembler::zero, done); - bind(even); - } -#endif // !_LP64 - // initialize remaining object fields: rdx is a multiple of 2 now - { Label loop; - bind(loop); - movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 1*BytesPerWord), t1); - NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - 2*BytesPerWord), t1);) - decrement(index); - jcc(Assembler::notZero, loop); - } - // done + // len_in_bytes is positive and ptr sized + subptr(len_in_bytes, hdr_size_in_bytes); + jcc(Assembler::zero, done); + zero_memory(obj, len_in_bytes, hdr_size_in_bytes, t1); bind(done); } @@ -241,47 +200,49 @@ try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case); - initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2); + initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB); } -void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2) { +void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) { assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "con_size_in_bytes is not multiple of alignment"); const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; initialize_header(obj, klass, noreg, t1, t2); - // clear rest of allocated space - const Register t1_zero = t1; - const Register index = t2; - const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below) - if (var_size_in_bytes != noreg) { - mov(index, var_size_in_bytes); - initialize_body(obj, index, hdr_size_in_bytes, t1_zero); - } else if (con_size_in_bytes <= threshold) { - // use explicit null stores - // code size = 2 + 3*n bytes (n = number of fields to clear) - xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) - for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord) - movptr(Address(obj, i), t1_zero); - } else if (con_size_in_bytes > hdr_size_in_bytes) { - // use loop to null out the fields - // code size = 16 bytes for even n (n = number of fields to clear) - // initialize last object field first if odd number of fields - xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) - movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3); - // initialize last object field if constant size is odd - if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0) - movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero); - // initialize remaining object fields: rdx is a multiple of 2 - { Label loop; - bind(loop); - movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)), - t1_zero); - NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)), - t1_zero);) - decrement(index); - jcc(Assembler::notZero, loop); + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { + // clear rest of allocated space + const Register t1_zero = t1; + const Register index = t2; + const int threshold = 6 * BytesPerWord; // approximate break even point for code size (see comments below) + if (var_size_in_bytes != noreg) { + mov(index, var_size_in_bytes); + initialize_body(obj, index, hdr_size_in_bytes, t1_zero); + } else if (con_size_in_bytes <= threshold) { + // use explicit null stores + // code size = 2 + 3*n bytes (n = number of fields to clear) + xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) + for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += BytesPerWord) + movptr(Address(obj, i), t1_zero); + } else if (con_size_in_bytes > hdr_size_in_bytes) { + // use loop to null out the fields + // code size = 16 bytes for even n (n = number of fields to clear) + // initialize last object field first if odd number of fields + xorptr(t1_zero, t1_zero); // use t1_zero reg to clear memory (shorter code) + movptr(index, (con_size_in_bytes - hdr_size_in_bytes) >> 3); + // initialize last object field if constant size is odd + if (((con_size_in_bytes - hdr_size_in_bytes) & 4) != 0) + movptr(Address(obj, con_size_in_bytes - (1*BytesPerWord)), t1_zero); + // initialize remaining object fields: rdx is a multiple of 2 + { Label loop; + bind(loop); + movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)), + t1_zero); + NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)), + t1_zero);) + decrement(index); + jcc(Assembler::notZero, loop); + } } } --- old/src/cpu/sparc/vm/c1_MacroAssembler_sparc.hpp 2016-01-11 14:39:02.321968831 +0100 +++ new/src/cpu/sparc/vm/c1_MacroAssembler_sparc.hpp 2016-01-11 14:39:02.025968844 +0100 @@ -50,7 +50,8 @@ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise int con_size_in_bytes, // object size in bytes if known at compile time Register t1, // temp register - Register t2 // temp register + Register t2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB ); // allocation of fixed-size objects --- old/src/cpu/x86/vm/c1_Runtime1_x86.cpp 2016-01-11 14:39:02.381968828 +0100 +++ new/src/cpu/x86/vm/c1_Runtime1_x86.cpp 2016-01-11 14:39:02.213968836 +0100 @@ -1040,7 +1040,7 @@ __ tlab_allocate(obj, obj_size, 0, t1, t2, slow_path); - __ initialize_object(obj, klass, obj_size, 0, t1, t2); + __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ true); __ verify_oop(obj); __ pop(rbx); __ pop(rdi); @@ -1053,7 +1053,7 @@ __ eden_allocate(obj, obj_size, 0, t1, slow_path); __ incr_allocated_bytes(thread, obj_size, 0); - __ initialize_object(obj, klass, obj_size, 0, t1, t2); + __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false); __ verify_oop(obj); __ pop(rbx); __ pop(rdi); @@ -1169,7 +1169,9 @@ __ andptr(t1, Klass::_lh_header_size_mask); __ subptr(arr_size, t1); // body length __ addptr(t1, obj); // body start - __ initialize_body(t1, arr_size, 0, t2); + if (!ZeroTLAB) { + __ initialize_body(t1, arr_size, 0, t2); + } __ verify_oop(obj); __ ret(0); --- old/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp 2016-01-11 14:39:02.369968828 +0100 +++ new/src/cpu/sparc/vm/c1_MacroAssembler_sparc.cpp 2016-01-11 14:39:02.033968844 +0100 @@ -205,12 +205,7 @@ void C1_MacroAssembler::initialize_body(Register base, Register index) { - assert_different_registers(base, index); - Label loop; - bind(loop); - subcc(index, HeapWordSize, index); - brx(Assembler::greaterEqual, true, Assembler::pt, loop); - delayed()->st_ptr(G0, base, index); + zero_memory(base, index); } @@ -237,7 +232,7 @@ } try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case); - initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2); + initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2, /* is_tlab_allocated */ UseTLAB); } void C1_MacroAssembler::initialize_object( @@ -246,7 +241,8 @@ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise int con_size_in_bytes, // object size in bytes if known at compile time Register t1, // temp register - Register t2 // temp register + Register t2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB ) { const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize; @@ -269,31 +265,33 @@ #endif - // initialize body - const int threshold = 5 * HeapWordSize; // approximate break even point for code size - if (var_size_in_bytes != noreg) { - // use a loop - add(obj, hdr_size_in_bytes, t1); // compute address of first element - sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body - initialize_body(t1, t2); + if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) { + // initialize body + const int threshold = 5 * HeapWordSize; // approximate break even point for code size + if (var_size_in_bytes != noreg) { + // use a loop + add(obj, hdr_size_in_bytes, t1); // compute address of first element + sub(var_size_in_bytes, hdr_size_in_bytes, t2); // compute size of body + initialize_body(t1, t2); #ifndef _LP64 - } else if (con_size_in_bytes < threshold * 2) { - // on v9 we can do double word stores to fill twice as much space. - assert(hdr_size_in_bytes % 8 == 0, "double word aligned"); - assert(con_size_in_bytes % 8 == 0, "double word aligned"); - for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i); + } else if (con_size_in_bytes < threshold * 2) { + // on v9 we can do double word stores to fill twice as much space. + assert(hdr_size_in_bytes % 8 == 0, "double word aligned"); + assert(con_size_in_bytes % 8 == 0, "double word aligned"); + for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += 2 * HeapWordSize) stx(G0, obj, i); #endif - } else if (con_size_in_bytes <= threshold) { - // use explicit NULL stores - for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i); - } else if (con_size_in_bytes > hdr_size_in_bytes) { - // use a loop - const Register base = t1; - const Register index = t2; - add(obj, hdr_size_in_bytes, base); // compute address of first element - // compute index = number of words to clear - set(con_size_in_bytes - hdr_size_in_bytes, index); - initialize_body(base, index); + } else if (con_size_in_bytes <= threshold) { + // use explicit NULL stores + for (int i = hdr_size_in_bytes; i < con_size_in_bytes; i += HeapWordSize) st_ptr(G0, obj, i); + } else if (con_size_in_bytes > hdr_size_in_bytes) { + // use a loop + const Register base = t1; + const Register index = t2; + add(obj, hdr_size_in_bytes, base); // compute address of first element + // compute index = number of words to clear + set(con_size_in_bytes - hdr_size_in_bytes, index); + initialize_body(base, index); + } } if (CURRENT_ENV->dtrace_alloc_probes()) { --- old/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp 2016-01-11 14:39:02.369968828 +0100 +++ new/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp 2016-01-11 14:39:02.025968844 +0100 @@ -435,7 +435,7 @@ __ tlab_allocate(O0_obj, G1_obj_size, 0, G3_t1, slow_path); - __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2); + __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ true); __ verify_oop(O0_obj); __ mov(O0, I0); __ ret(); @@ -447,7 +447,7 @@ __ eden_allocate(O0_obj, G1_obj_size, 0, G3_t1, G4_t2, slow_path); __ incr_allocated_bytes(G1_obj_size, G3_t1, G4_t2); - __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2); + __ initialize_object(O0_obj, G5_klass, G1_obj_size, 0, G3_t1, G4_t2, /* is_tlab_allocated */ false); __ verify_oop(O0_obj); __ mov(O0, I0); __ ret(); @@ -542,7 +542,9 @@ __ ldub(klass_lh, G3_t1, klass_lh_header_size_offset); __ sub(G1_arr_size, G3_t1, O1_t2); // body length __ add(O0_obj, G3_t1, G3_t1); // body start - __ initialize_body(G3_t1, O1_t2); + if (!ZeroTLAB) { + __ initialize_body(G3_t1, O1_t2); + } __ verify_oop(O0_obj); __ retl(); __ delayed()->nop(); --- old/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp 2016-01-11 14:39:02.425968826 +0100 +++ new/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp 2016-01-11 14:39:02.077968842 +0100 @@ -105,7 +105,7 @@ // an illusion of a contiguous Eden and optionally retires the tlab. // Waste accounting should be done in caller as appropriate; see, // for example, clear_before_allocation(). -void ThreadLocalAllocBuffer::make_parsable(bool retire) { +void ThreadLocalAllocBuffer::make_parsable(bool retire, bool zap) { if (end() != NULL) { invariants(); @@ -113,7 +113,7 @@ myThread()->incr_allocated_bytes(used_bytes()); } - CollectedHeap::fill_with_object(top(), hard_end(), retire); + CollectedHeap::fill_with_object(top(), hard_end(), retire && zap); if (retire || ZeroTLAB) { // "Reset" the TLAB set_start(NULL); --- old/src/cpu/sparc/vm/macroAssembler_sparc.hpp 2016-01-11 14:39:02.365968828 +0100 +++ new/src/cpu/sparc/vm/macroAssembler_sparc.hpp 2016-01-11 14:39:02.025968844 +0100 @@ -1318,6 +1318,7 @@ Label& slow_case // continuation point if fast allocation fails ); void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); + void zero_memory(Register base, Register index); void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2); --- old/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2016-01-11 14:39:02.401968827 +0100 +++ new/src/cpu/sparc/vm/macroAssembler_sparc.cpp 2016-01-11 14:39:02.057968843 +0100 @@ -3471,11 +3471,27 @@ add(top, t1, top); // t1 is tlab_size sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); + + if (ZeroTLAB) { + // This is a fast TLAB refill, therefore the GC is not notified of it. + // So compiled code must fill the new TLAB with zeroes. + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); + zero_memory(t2, t1); + } verify_tlab(); ba(retry); delayed()->nop(); } +void MacroAssembler::zero_memory(Register base, Register index) { + assert_different_registers(base, index); + Label loop; + bind(loop); + subcc(index, HeapWordSize, index); + brx(Assembler::greaterEqual, true, Assembler::pt, loop); + delayed()->st_ptr(G0, base, index); +} + void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2) { // Bump total bytes allocated by this thread --- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2016-01-11 14:39:02.429968825 +0100 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2016-01-11 14:39:02.049968843 +0100 @@ -5428,7 +5428,7 @@ Label& try_eden, Label& slow_case) { Register top = rax; - Register t1 = rcx; + Register t1 = rcx; // object size Register t2 = rsi; Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); @@ -5524,12 +5524,76 @@ addptr(top, t1); subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); + + if (ZeroTLAB) { + // This is a fast TLAB refill, therefore the GC is not notified of it. + // So compiled code must fill the new TLAB with zeroes. + movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); + zero_memory(top, t1, 0, t2); + } + verify_tlab(); jmp(retry); return thread_reg; // for use by caller } +// Preserves the contents of address, destroys the contents length_in_bytes and temp. +void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp) { + assert(address != length_in_bytes && address != temp && temp != length_in_bytes, "registers must be different"); + assert((offset_in_bytes & (BytesPerWord - 1)) == 0, "offset must be a multiple of BytesPerWord"); + Label done; + + testptr(length_in_bytes, length_in_bytes); + jcc(Assembler::zero, done); + + // initialize topmost word, divide index by 2, check if odd and test if zero + // note: for the remaining code to work, index must be a multiple of BytesPerWord +#ifdef ASSERT + { + Label L; + testptr(length_in_bytes, BytesPerWord - 1); + jcc(Assembler::zero, L); + stop("length must be a multiple of BytesPerWord"); + bind(L); + } +#endif + Register index = length_in_bytes; + xorptr(temp, temp); // use _zero reg to clear memory (shorter code) + if (UseIncDec) { + shrptr(index, 3); // divide by 8/16 and set carry flag if bit 2 was set + } else { + shrptr(index, 2); // use 2 instructions to avoid partial flag stall + shrptr(index, 1); + } +#ifndef _LP64 + // index could have not been a multiple of 8 (i.e., bit 2 was set) + { + Label even; + // note: if index was a multiple of 8, then it cannot + // be 0 now otherwise it must have been 0 before + // => if it is even, we don't need to check for 0 again + jcc(Assembler::carryClear, even); + // clear topmost word (no jump would be needed if conditional assignment worked here) + movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp); + // index could be 0 now, must check again + jcc(Assembler::zero, done); + bind(even); + } +#endif // !_LP64 + // initialize remaining object fields: index is a multiple of 2 now + { + Label loop; + bind(loop); + movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp); + NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);) + decrement(index); + jcc(Assembler::notZero, loop); + } + + bind(done); +} + void MacroAssembler::incr_allocated_bytes(Register thread, Register var_size_in_bytes, int con_size_in_bytes, --- old/src/cpu/x86/vm/macroAssembler_x86.hpp 2016-01-11 14:39:02.473968823 +0100 +++ new/src/cpu/x86/vm/macroAssembler_x86.hpp 2016-01-11 14:39:02.161968838 +0100 @@ -529,6 +529,8 @@ Label& slow_case // continuation point if fast allocation fails ); Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address + void zero_memory(Register address, Register length_in_bytes, int offset_in_bytes, Register temp); + void incr_allocated_bytes(Register thread, Register var_size_in_bytes, int con_size_in_bytes, Register t1 = noreg); --- old/src/cpu/x86/vm/c1_MacroAssembler_x86.hpp 2016-01-11 14:39:02.501968822 +0100 +++ new/src/cpu/x86/vm/c1_MacroAssembler_x86.hpp 2016-01-11 14:39:02.225968835 +0100 @@ -65,7 +65,8 @@ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise int con_size_in_bytes, // object size in bytes if known at compile time Register t1, // temp register - Register t2 // temp register + Register t2, // temp register + bool is_tlab_allocated // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB ); // allocation of fixed-size objects --- old/src/share/vm/opto/macro.cpp 2016-01-11 14:39:02.781968809 +0100 +++ new/src/share/vm/opto/macro.cpp 2016-01-11 14:39:02.701968813 +0100 @@ -1813,10 +1813,11 @@ // there can be two Allocates to one Initialize. The answer in all these // edge cases is safety first. It is always safe to clear immediately // within an Allocate, and then (maybe or maybe not) clear some more later. - if (!ZeroTLAB) + if (!(UseTLAB && ZeroTLAB)) { rawmem = ClearArrayNode::clear_memory(control, rawmem, object, header_size, size_in_bytes, &_igvn); + } } else { if (!init->is_complete()) { // Try to win by zeroing only what the init does not store. --- old/test/TEST.groups 2016-01-11 14:39:04.101968748 +0100 +++ new/test/TEST.groups 2016-01-11 14:39:03.977968753 +0100 @@ -288,6 +288,7 @@ compiler/jsr292/ \ compiler/loopopts/ \ compiler/macronodes/ \ + compiler/memoryinitialization/ \ compiler/osr/ \ compiler/regalloc/ \ compiler/runtime/ \ --- /dev/null 2016-01-04 08:27:25.159751836 +0100 +++ new/test/compiler/memoryinitialization/ZeroTLABTest.java 2016-01-11 14:39:03.977968753 +0100 @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test + * @bug 8086053 + * @run main/othervm -Xcomp -XX:+UseG1GC -XX:+UseTLAB -XX:+ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseG1GC -XX:+UseTLAB -XX:-ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseG1GC -XX:-UseTLAB -XX:+ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseG1GC -XX:-UseTLAB -XX:-ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseParallelGC -XX:+UseTLAB -XX:+ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseParallelGC -XX:+UseTLAB -XX:-ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseParallelGC -XX:-UseTLAB -XX:+ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseParallelGC -XX:-UseTLAB -XX:-ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseSerialGC -XX:+UseTLAB -XX:+ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseSerialGC -XX:+UseTLAB -XX:-ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseSerialGC -XX:-UseTLAB -XX:+ZeroTLAB ZeroTLABTest + * @run main/othervm -Xcomp -XX:+UseSerialGC -XX:-UseTLAB -XX:-ZeroTLAB ZeroTLABTest + */ +public class ZeroTLABTest { + public static void main(String args[]) { + System.out.println("Test PASSED"); + } +} --- old/src/share/vm/opto/macroArrayCopy.cpp 2016-01-11 14:39:04.129968746 +0100 +++ new/src/share/vm/opto/macroArrayCopy.cpp 2016-01-11 14:39:04.017968751 +0100 @@ -295,7 +295,7 @@ // out-edges of the dest, we need to avoid making derived pointers // from it until we have checked its uses.) if (ReduceBulkZeroing - && !ZeroTLAB // pointless if already zeroed + && !(UseTLAB && ZeroTLAB) // pointless if already zeroed && basic_elem_type != T_CONFLICT // avoid corner case && !src->eqv_uncast(dest) && alloc != NULL --- old/src/share/vm/opto/memnode.cpp 2016-01-11 14:39:04.137968746 +0100 +++ new/src/share/vm/opto/memnode.cpp 2016-01-11 14:39:04.005968752 +0100 @@ -3850,7 +3850,7 @@ bool do_zeroing = true; // we might give up if inits are very sparse int big_init_gaps = 0; // how many large gaps have we seen? - if (ZeroTLAB) do_zeroing = false; + if (UseTLAB && ZeroTLAB) do_zeroing = false; if (!ReduceFieldZeroing && !ReduceBulkZeroing) do_zeroing = false; for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) { @@ -3951,7 +3951,7 @@ remove_extra_zeroes(); // clear out all the zmems left over add_req(inits); - if (!ZeroTLAB) { + if (!(UseTLAB && ZeroTLAB)) { // If anything remains to be zeroed, zero it all now. zeroes_done = align_size_down(zeroes_done, BytesPerInt); // if it is the last unused 4 bytes of an instance, forget about it