--- old/src/cpu/sparc/vm/vm_version_sparc.cpp 2015-08-19 16:27:31.575419075 -0400 +++ new/src/cpu/sparc/vm/vm_version_sparc.cpp 2015-08-19 16:27:31.487418638 -0400 @@ -85,27 +85,6 @@ _supports_cx8 = has_v9(); _supports_atomic_getset4 = true; // swap instruction - // There are Fujitsu Sparc64 CPUs which support blk_init as well so - // we have to take this check out of the 'is_niagara()' block below. - if (has_blk_init()) { - // When using CMS or G1, we cannot use memset() in BOT updates - // because the sun4v/CMT version in libc_psr uses BIS which - // exposes "phantom zeros" to concurrent readers. See 6948537. - if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) { - FLAG_SET_DEFAULT(UseMemSetInBOT, false); - } - // Issue a stern warning if the user has explicitly set - // UseMemSetInBOT (it is known to cause issues), but allow - // use for experimentation and debugging. - if (UseConcMarkSweepGC || UseG1GC) { - if (UseMemSetInBOT) { - assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error"); - warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability" - " on sun4v; please understand that you are using at your own risk!"); - } - } - } - if (is_niagara()) { // Indirect branch is the same cost as direct if (FLAG_IS_DEFAULT(UseInlineCaches)) { --- old/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp 2015-08-19 16:27:32.071421534 -0400 +++ new/src/share/vm/gc/g1/g1BlockOffsetTable.inline.hpp 2015-08-19 16:27:31.983421098 -0400 @@ -27,6 +27,7 @@ #include "gc/g1/g1BlockOffsetTable.hpp" #include "gc/g1/heapRegion.inline.hpp" +#include "gc/shared/memset_with_concurrent_readers.hpp" #include "gc/shared/space.hpp" inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) { @@ -68,15 +69,7 @@ check_index(right, "right index out of range"); assert(left <= right, "indexes out of order"); size_t num_cards = right - left + 1; - if (UseMemSetInBOT) { - memset(&_offset_array[left], offset, num_cards); - } else { - size_t i = left; - const size_t end = i + num_cards; - for (; i < end; i++) { - _offset_array[i] = offset; - } - } + memset_with_concurrent_readers(&_offset_array[left], offset, num_cards); } // Variant of index_for that does not check the index for validity. --- old/src/share/vm/gc/g1/g1SATBCardTableModRefBS.cpp 2015-08-19 16:27:32.571424014 -0400 +++ new/src/share/vm/gc/g1/g1SATBCardTableModRefBS.cpp 2015-08-19 16:27:32.483423577 -0400 @@ -27,6 +27,7 @@ #include "gc/g1/g1SATBCardTableModRefBS.hpp" #include "gc/g1/heapRegion.hpp" #include "gc/g1/satbQueue.hpp" +#include "gc/shared/memset_with_concurrent_readers.hpp" #include "oops/oop.inline.hpp" #include "runtime/atomic.inline.hpp" #include "runtime/mutexLocker.hpp" @@ -108,15 +109,7 @@ jbyte *const first = byte_for(mr.start()); jbyte *const last = byte_after(mr.last()); - // Below we may use an explicit loop instead of memset() because on - // certain platforms memset() can give concurrent readers phantom zeros. - if (UseMemSetInBOT) { - memset(first, g1_young_gen, last - first); - } else { - for (jbyte* i = first; i < last; i++) { - *i = g1_young_gen; - } - } + memset_with_concurrent_readers(first, g1_young_gen, last - first); } #ifndef PRODUCT --- old/src/share/vm/gc/shared/blockOffsetTable.hpp 2015-08-19 16:27:33.071426493 -0400 +++ new/src/share/vm/gc/shared/blockOffsetTable.hpp 2015-08-19 16:27:32.983426057 -0400 @@ -25,9 +25,12 @@ #ifndef SHARE_VM_GC_SHARED_BLOCKOFFSETTABLE_HPP #define SHARE_VM_GC_SHARED_BLOCKOFFSETTABLE_HPP +#include "gc/shared/memset_with_concurrent_readers.hpp" #include "memory/memRegion.hpp" #include "memory/virtualspace.hpp" +#include "runtime/globals.hpp" #include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" // The CollectedHeap type requires subtypes to implement a method // "block_start". For some subtypes, notably generational @@ -126,6 +129,19 @@ VirtualSpace _vs; u_char* _offset_array; // byte array keeping backwards offsets + void fill_range(size_t start, size_t num_cards, u_char offset) { + void* start_ptr = &_offset_array[start]; +#if INCLUDE_ALL_GCS + // If collector is concurrent, special handling may be needed. + assert(!UseG1GC, "Shouldn't be here when using G1"); + if (UseConcMarkSweepGC) { + memset_with_concurrent_readers(start_ptr, offset, num_cards); + return; + } +#endif // INCLUDE_ALL_GCS + memset(start_ptr, offset, num_cards); + } + protected: // Bounds checking accessors: // For performance these have to devolve to array accesses in product builds. @@ -160,20 +176,7 @@ assert(left < right, "Heap addresses out of order"); size_t num_cards = pointer_delta(right, left) >> LogN_words; - // Below, we may use an explicit loop instead of memset() - // because on certain platforms memset() can give concurrent - // readers "out-of-thin-air," phantom zeros; see 6948537. - if (UseMemSetInBOT) { - memset(&_offset_array[index_for(left)], offset, num_cards); - } else { - size_t i = index_for(left); - const size_t end = i + num_cards; - for (; i < end; i++) { - // Elided until CR 6977974 is fixed properly. - // assert(!reducing || _offset_array[i] >= offset, "Not reducing"); - _offset_array[i] = offset; - } - } + fill_range(index_for(left), num_cards, offset); } void set_offset_array(size_t left, size_t right, u_char offset, bool reducing = false) { @@ -182,20 +185,7 @@ assert(left <= right, "indexes out of order"); size_t num_cards = right - left + 1; - // Below, we may use an explicit loop instead of memset - // because on certain platforms memset() can give concurrent - // readers "out-of-thin-air," phantom zeros; see 6948537. - if (UseMemSetInBOT) { - memset(&_offset_array[left], offset, num_cards); - } else { - size_t i = left; - const size_t end = i + num_cards; - for (; i < end; i++) { - // Elided until CR 6977974 is fixed properly. - // assert(!reducing || _offset_array[i] >= offset, "Not reducing"); - _offset_array[i] = offset; - } - } + fill_range(left, num_cards, offset); } void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const { --- old/src/share/vm/prims/jni.cpp 2015-08-19 16:27:33.587429052 -0400 +++ new/src/share/vm/prims/jni.cpp 2015-08-19 16:27:33.495428596 -0400 @@ -3869,6 +3869,7 @@ void TestBufferingOopClosure_test(); void TestCodeCacheRemSet_test(); void FreeRegionList_test(); +void test_memset_with_concurrent_readers(); #endif void execute_internal_vm_tests() { @@ -3910,6 +3911,7 @@ if (UseG1GC) { run_unit_test(FreeRegionList_test()); } + run_unit_test(test_memset_with_concurrent_readers()); #endif tty->print_cr("All internal VM tests passed"); } --- old/src/share/vm/runtime/globals.hpp 2015-08-19 16:27:34.139431789 -0400 +++ new/src/share/vm/runtime/globals.hpp 2015-08-19 16:27:34.043431313 -0400 @@ -650,10 +650,6 @@ experimental(bool, AlwaysSafeConstructors, false, \ "Force safe construction, as if all fields are final.") \ \ - /* Temporary: See 6948537 */ \ - experimental(bool, UseMemSetInBOT, true, \ - "(Unstable) uses memset in BOT updates in GC code") \ - \ diagnostic(bool, UnlockDiagnosticVMOptions, trueInDebug, \ "Enable normal processing of flags relating to field diagnostics")\ \ --- /dev/null 2015-08-19 12:23:38.462857388 -0400 +++ new/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp 2015-08-19 16:27:34.595434050 -0400 @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" + +#include "gc/shared/memset_with_concurrent_readers.hpp" +#include "runtime/prefetch.inline.hpp" +#include "utilities/debug.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +#if INCLUDE_ALL_GCS + +// An implementation of memset, for use when there may be concurrent +// readers of the region being stored into. +// +// We can't use the standard library memset if it is implemented using +// block initializing stores. Doing so can result in concurrent readers +// seeing spurious zeros. +// +// We can't use the obvious C/C++ for-loop, because the compiler may +// recognize the idiomatic loop and optimize it into a call to the +// standard library memset; we've seen exactly this happen with, for +// example, Solaris Studio 12.3. Hence the use of inline assembly +// code, hiding loops from the compiler's optimizer. +// +// We don't attempt to use the standard library memset when it is safe +// to do so. We could conservatively do so by detecting the presence +// of block initializing stores (VM_Version::has_blk_init()), but the +// implementation provided here should be sufficient. + +static void fill_subword(void* start, void* end, int value); + +void memset_with_concurrent_readers(void* to, int value, size_t size) { + Prefetch::write(to, 0); + void* end = static_cast(to) + size; + if (size >= BytesPerWord) { + // Fill any partial word prefix. + uintx* aligned_to = static_cast(align_ptr_up(to, BytesPerWord)); + fill_subword(to, aligned_to, value); + + // Compute fill word. + STATIC_ASSERT(BitsPerByte == 8); + STATIC_ASSERT(BitsPerWord == 64); + uintx xvalue = value & 0xff; + xvalue |= (xvalue << 8); + xvalue |= (xvalue << 16); + xvalue |= (xvalue << 32); + + uintx* aligned_end = static_cast(align_ptr_down(end, BytesPerWord)); + assert(aligned_to <= aligned_end, "invariant"); + + // for ( ; aligned_to < aligned_end; ++aligned_to) { + // *aligned_to = xvalue; + // } + uintptr_t temp; + __asm__ volatile( + // Unroll loop x8. + " sub %[aend], %[ato], %[temp]\n\t" + " cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words + " ba %xcc, 2f\n\t" // goto TEST always + " sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words + // LOOP: + "1:\n\t" // unrolled x8 store loop top + " cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to + " stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented + " stx %[xvalue], [%[ato]-56]\n\t" + " stx %[xvalue], [%[ato]-48]\n\t" + " stx %[xvalue], [%[ato]-40]\n\t" + " stx %[xvalue], [%[ato]-32]\n\t" + " stx %[xvalue], [%[ato]-24]\n\t" + " stx %[xvalue], [%[ato]-16]\n\t" + " stx %[xvalue], [%[ato]-8]\n\t" + // TEST: + "2:\n\t" + " bgu,a %xcc, 1b\n\t" // goto LOOP if more than 7 words remaining + " add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration + // Fill remaining < 8 full words. + // Dispatch on (aligned_end - aligned_to). + // offset := (7 - (aligned_end - aligned_to)) + 3 + // 3 instructions from rdpc to DISPATCH + " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end + " srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4 + " add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size + " rd %pc, %[temp]\n\t" // dispatch on scaled offset + " jmpl %[temp]+%[ato], %g0\n\t" + " nop\n\t" + // DISPATCH: no direct reference, but without it the store block may be elided. + "3:\n\t" + " stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue + " stx %[xvalue], [%[aend]-48]\n\t" + " stx %[xvalue], [%[aend]-40]\n\t" + " stx %[xvalue], [%[aend]-32]\n\t" + " stx %[xvalue], [%[aend]-24]\n\t" + " stx %[xvalue], [%[aend]-16]\n\t" + " stx %[xvalue], [%[aend]-8]\n\t" // aligned_end[-1] = xvalue + : /* no outputs */ + : [ato] "&+r" (aligned_to), + [aend] "r" (aligned_end), + [xvalue] "r" (xvalue), + [temp] "&=r" (temp) + : "cc", "memory"); + to = aligned_end; // setup for suffix + } + // Fill any partial word suffix. Also the prefix if size < BytesPerWord. + fill_subword(to, end, value); +} + +static void fill_subword(void* start, void* end, int value) { + STATIC_ASSERT(BytesPerWord == 8); + assert(pointer_delta(end, start, 1) < BytesPerWord, "precondition"); + // Dispatch on (end - start). + void* pc; + __asm__ volatile( + // offset := (7 - (end - start)) + 3 + // 3 instructions from rdpc to DISPATCH + " sub %[offset], %[end], %[offset]\n\t" // offset := start - end + " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4 + " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size + " rd %pc, %[pc]\n\t" // dispatch on scaled offset + " jmpl %[pc]+%[offset], %g0\n\t" + " nop\n\t" + // DISPATCH: no direct reference, but without it the store block may be elided. + "1:\n\t" + " stb %[value], [%[end]-7]\n\t" // end[-7] = value + " stb %[value], [%[end]-6]\n\t" + " stb %[value], [%[end]-5]\n\t" + " stb %[value], [%[end]-4]\n\t" + " stb %[value], [%[end]-3]\n\t" + " stb %[value], [%[end]-2]\n\t" + " stb %[value], [%[end]-1]\n\t" // end[-1] = value + : /* no outputs */ + : [offset] "&+r" (start), + [end] "r" (end), + [value] "r" (value), + [pc] "&=r" (pc) + : "memory"); +} + +#endif // INCLUDE_ALL_GCS --- /dev/null 2015-08-19 12:23:38.462857388 -0400 +++ new/src/share/vm/gc/shared/memset_with_concurrent_readers.cpp 2015-08-19 16:27:35.047436292 -0400 @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" + +#include +#include "gc/shared/memset_with_concurrent_readers.hpp" +#include "utilities/debug.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" +#include "utilities/ostream.hpp" + +#if INCLUDE_ALL_GCS + +// Unit test +#ifdef ASSERT + +static unsigned line_byte(const char* line, size_t i) { + return unsigned(line[i]) & 0xFF; +} + +// Verify memset_with_concurrent_readers mimics memset. +// We don't attempt to verify the concurrent reader case. +void test_memset_with_concurrent_readers() { + const size_t chunk_size = 8 * BytesPerWord; + const unsigned chunk_count = 4; + const size_t block_size = (chunk_count + 4) * chunk_size; + char block[block_size]; + char clear_block[block_size]; + char set_block[block_size]; + + // block format: + // 0: unused leading chunk + // 1: chunk written from start index to end of chunk + // ... nchunks fully written chunks + // N: chunk written from start of chunk to end index + // N+1: unused trailing chunk + + const int clear_value = 0; + const int set_value = 0xAC; + + memset(clear_block, clear_value, block_size); + memset(set_block, set_value, block_size); + + for (unsigned nchunks = 0; nchunks <= chunk_count; ++nchunks) { + for (size_t start = 1; start <= chunk_size; ++start) { + for (size_t end = 0; end <= chunk_size; ++end) { + size_t set_start = chunk_size + start; + size_t set_end = (2 + nchunks) * chunk_size + end; + size_t set_size = set_end - set_start; + + memset(block, clear_value, block_size); + memset_with_concurrent_readers(&block[set_start], set_value, set_size); + bool head_clear = !memcmp(clear_block, block, set_start); + bool middle_set = !memcmp(set_block, block + set_start, set_size); + bool tail_clear = !memcmp(clear_block, block + set_end, block_size - set_end); + if (!(head_clear && middle_set && tail_clear)) { + tty->print_cr("*** memset_with_concurrent_readers failed: " + "set start " SIZE_FORMAT ", set end " SIZE_FORMAT, + set_start, set_end); + for (unsigned chunk = 0; chunk < (block_size / chunk_size); ++chunk) { + for (unsigned line = 0; line < (chunk_size / BytesPerWord); ++line) { + const char* lp = &block[chunk * chunk_size + line * BytesPerWord]; + tty->print_cr("%d,%d: %2x %2x %2x %2x %2x %2x %2x %2x", + chunk, line, + line_byte(lp, 0), line_byte(lp, 1), + line_byte(lp, 2), line_byte(lp, 3), + line_byte(lp, 4), line_byte(lp, 5), + line_byte(lp, 6), line_byte(lp, 7)); + } + } + assert(head_clear, "leading byte not clear"); + assert(middle_set, "memset byte not set"); + assert(tail_clear, "trailing bye not clear"); + } + } + } + } +} + +#endif // end unit test + +#endif // INCLUDE_ALL_GCS --- /dev/null 2015-08-19 12:23:38.462857388 -0400 +++ new/src/share/vm/gc/shared/memset_with_concurrent_readers.hpp 2015-08-19 16:27:35.499438533 -0400 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SRC_SHARE_VM_GC_SHARED_MEMSETWITHCONCURRENTREADERS_HPP +#define SRC_SHARE_VM_GC_SHARED_MEMSETWITHCONCURRENTREADERS_HPP + +#include +#include +#include "utilities/macros.hpp" + +// Only used by concurrent collectors. +#if INCLUDE_ALL_GCS + +// Fill a block of memory with value, like memset, but with the +// understanding that there may be concurrent readers of that memory. +void memset_with_concurrent_readers(void* to, int value, size_t size); + +#ifdef TARGET_ARCH_sparc + +// SPARC requires special handling. See SPARC-specific definition. + +#else +// All others just use memset. + +inline void memset_with_concurrent_readers(void* to, int value, size_t size) { + ::memset(to, value, size); +} + +#endif // End of target dispatch. + +#endif // INCLUDE_ALL_GCS + +#endif // include guard