--- old/src/share/vm/gc/g1/g1ConcurrentMark.cpp 2016-09-05 12:00:52.639395626 +0200 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.cpp 2016-09-05 12:00:52.500391303 +0200 @@ -133,129 +133,165 @@ } G1CMMarkStack::G1CMMarkStack() : - _reserved_space(), + _max_chunk_capacity(0), _base(NULL), - _capacity(0), - _saved_index((size_t)AllBits), + _chunk_capacity(0), + _out_of_memory(false), _should_expand(false) { set_empty(); } bool G1CMMarkStack::resize(size_t new_capacity) { assert(is_empty(), "Only resize when stack is empty."); - assert(new_capacity <= MarkStackSizeMax, - "Trying to resize stack to " SIZE_FORMAT " elements when the maximum is " SIZE_FORMAT, new_capacity, MarkStackSizeMax); + assert(new_capacity <= _max_chunk_capacity, + "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); - size_t reservation_size = ReservedSpace::allocation_align_size_up(new_capacity * sizeof(oop)); + OopChunk* new_base = MmapArrayAllocator::allocate_or_null(new_capacity); - ReservedSpace rs(reservation_size); - if (!rs.is_reserved()) { - log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " elements and size " SIZE_FORMAT "B.", new_capacity, reservation_size); + if (new_base == NULL) { + log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(OopChunk)); return false; } - - VirtualSpace vs; - - if (!vs.initialize(rs, rs.size())) { - rs.release(); - log_warning(gc)("Failed to commit memory for new overflow mark stack of size " SIZE_FORMAT "B.", rs.size()); - return false; - } - - assert(vs.committed_size() == rs.size(), "Failed to commit all of the mark stack."); - // Release old mapping. - _reserved_space.release(); - - // Save new mapping for future unmapping. - _reserved_space = rs; - - MemTracker::record_virtual_memory_type((address)_reserved_space.base(), mtGC); + if (_base != NULL) { + MmapArrayAllocator::free(_base, _chunk_capacity); + } - _base = (oop*) vs.low(); - _capacity = new_capacity; + _base = new_base; + _chunk_capacity = new_capacity; set_empty(); _should_expand = false; return true; } -bool G1CMMarkStack::allocate(size_t capacity) { - return resize(capacity); +size_t G1CMMarkStack::capacity_alignment() { + return (size_t)lcm(os::vm_allocation_granularity(), sizeof(OopChunk)) / sizeof(void*); +} + +bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { + guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); + + size_t const OopChunkSizeInVoidStar = sizeof(OopChunk) / sizeof(void*); + + _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; + size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; + + guarantee(initial_chunk_capacity <= _max_chunk_capacity, + "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, + _max_chunk_capacity, + initial_chunk_capacity); + + log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT, + initial_chunk_capacity, _max_chunk_capacity); + + return resize(initial_chunk_capacity); } void G1CMMarkStack::expand() { // Clear expansion flag _should_expand = false; - if (_capacity == MarkStackSizeMax) { - log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " elements.", _capacity); + if (_chunk_capacity == _max_chunk_capacity) { + log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity); return; } - size_t old_capacity = _capacity; + size_t old_capacity = _chunk_capacity; // Double capacity if possible - size_t new_capacity = MIN2(old_capacity * 2, MarkStackSizeMax); + size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity); if (resize(new_capacity)) { - log_debug(gc)("Expanded marking stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " elements", + log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", old_capacity, new_capacity); } else { - log_warning(gc)("Failed to expand marking stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " elements", + log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks", old_capacity, new_capacity); } } G1CMMarkStack::~G1CMMarkStack() { if (_base != NULL) { - _base = NULL; - _reserved_space.release(); + MmapArrayAllocator::free(_base, _chunk_capacity); } } -void G1CMMarkStack::par_push_arr(oop* buffer, size_t n) { +void G1CMMarkStack::add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem) { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); - size_t start = _index; - size_t next_index = start + n; - if (next_index > _capacity) { - _overflow = true; - return; + elem->next = *list; + *list = elem; +} + +G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_list(OopChunk* volatile* list) { + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); + + OopChunk* result = *list; + if (result != NULL) { + *list = (*list)->next; } - // Otherwise. - _index = next_index; - for (size_t i = 0; i < n; i++) { - size_t ind = start + i; - assert(ind < _capacity, "By overflow test above."); - _base[ind] = buffer[i]; + return result; +} + +G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() { + // This dirty read is okay because we only ever increase the _hwm in parallel code. + if (_hwm >= _chunk_capacity) { + return NULL; } + + size_t cur_idx = Atomic::add(1, &_hwm) - 1; + if (cur_idx >= _chunk_capacity) { + return NULL; + } + + OopChunk* result = ::new (&_base[cur_idx]) OopChunk; + result->next = NULL; + return result; } -bool G1CMMarkStack::par_pop_arr(oop* buffer, size_t max, size_t* n) { - MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); - size_t index = _index; - if (index == 0) { - *n = 0; - return false; - } else { - size_t k = MIN2(max, index); - size_t new_ind = index - k; - for (size_t j = 0; j < k; j++) { - buffer[j] = _base[new_ind + j]; - } - _index = new_ind; - *n = k; - return true; +void G1CMMarkStack::par_push_chunk(oop* ptr_arr) { + // Get a new chunk. + OopChunk* new_chunk = remove_chunk_from_list(&_free_list); + + if (new_chunk == NULL) { + // Did not get a chunk from the free list. Allocate from backing memory. + new_chunk = allocate_new_chunk(); } + + if (new_chunk == NULL) { + _out_of_memory = true; + return; + } + + for (size_t i = 0; i < OopsPerChunk; i++) { + new_chunk->data[i] = ptr_arr[i]; } -void G1CMMarkStack::note_start_of_gc() { - assert(_saved_index == (size_t)AllBits, "note_start_of_gc()/end_of_gc() calls bracketed incorrectly"); - _saved_index = _index; + add_chunk_to_list(&_chunk_list, new_chunk); + Atomic::inc(&_chunks_in_chunk_list); } -void G1CMMarkStack::note_end_of_gc() { - guarantee(!stack_modified(), "Saved index " SIZE_FORMAT " must be the same as " SIZE_FORMAT, _saved_index, _index); +bool G1CMMarkStack::par_pop_chunk(oop* ptr_arr) { + OopChunk* cur = remove_chunk_from_list(&_chunk_list); + + if (cur == NULL) { + return false; + } + + Atomic::dec(&_chunks_in_chunk_list); - _saved_index = (size_t)AllBits; + for (size_t i = 0; i < OopsPerChunk; i++) { + ptr_arr[i] = (oop)cur->data[i]; + } + + add_chunk_to_list(&_free_list, cur); + return true; +} + +void G1CMMarkStack::set_empty() { + _chunks_in_chunk_list = 0; + _hwm = 0; + clear_out_of_memory(); + _chunk_list = NULL; + _free_list = NULL; } G1CMRootRegions::G1CMRootRegions() : @@ -483,9 +519,8 @@ } } - if (!_global_mark_stack.allocate(MarkStackSize)) { + if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) { vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack."); - return; } _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); @@ -1693,10 +1728,10 @@ // oop closures will set the has_overflown flag if we overflow the // global marking stack. - assert(_global_mark_stack.overflow() || _global_mark_stack.is_empty(), + assert(_global_mark_stack.is_out_of_memory() || _global_mark_stack.is_empty(), "mark stack should be empty (unless it overflowed)"); - if (_global_mark_stack.overflow()) { + if (_global_mark_stack.is_out_of_memory()) { // This should have been done already when we tried to push an // entry on to the global mark stack. But let's do it again. set_has_overflown(); @@ -2340,49 +2375,54 @@ } void G1CMTask::move_entries_to_global_stack() { - // local array where we'll store the entries that will be popped - // from the local queue - oop buffer[global_stack_transfer_size]; + // Local array where we'll store the entries that will be popped + // from the local queue. + oop buffer[G1CMMarkStack::OopsPerChunk]; - int n = 0; + size_t n = 0; oop obj; - while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { + while (n < G1CMMarkStack::OopsPerChunk && _task_queue->pop_local(obj)) { buffer[n] = obj; ++n; } + if (n < G1CMMarkStack::OopsPerChunk) { + buffer[n] = NULL; + } if (n > 0) { - // we popped at least one entry from the local queue - - if (!_cm->mark_stack_push(buffer, n)) { + if (!_cm->mark_stack_push(buffer)) { set_has_aborted(); } } - // this operation was quite expensive, so decrease the limits + // This operation was quite expensive, so decrease the limits. decrease_limits(); } -void G1CMTask::get_entries_from_global_stack() { - // local array where we'll store the entries that will be popped +bool G1CMTask::get_entries_from_global_stack() { + // Local array where we'll store the entries that will be popped // from the global stack. - oop buffer[global_stack_transfer_size]; - size_t n; - _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); - assert(n <= global_stack_transfer_size, - "we should not pop more than the given limit"); - if (n > 0) { - // yes, we did actually pop at least one entry - for (size_t i = 0; i < n; ++i) { - bool success = _task_queue->push(buffer[i]); - // We only call this when the local queue is empty or under a - // given target limit. So, we do not expect this push to fail. - assert(success, "invariant"); - } + oop buffer[G1CMMarkStack::OopsPerChunk]; + + if (!_cm->mark_stack_pop(buffer)) { + return false; } - // this operation was quite expensive, so decrease the limits + // We did actually pop at least one entry. + for (size_t i = 0; i < G1CMMarkStack::OopsPerChunk; ++i) { + oop elem = buffer[i]; + if (elem == NULL) { + break; + } + bool success = _task_queue->push(elem); + // We only call this when the local queue is empty or under a + // given target limit. So, we do not expect this push to fail. + assert(success, "invariant"); + } + + // This operation was quite expensive, so decrease the limits decrease_limits(); + return true; } void G1CMTask::drain_local_queue(bool partially) { @@ -2426,20 +2466,21 @@ // Decide what the target size is, depending whether we're going to // drain it partially (so that other tasks can steal if they run out - // of things to do) or totally (at the very end). Notice that, - // because we move entries from the global stack in chunks or - // because another task might be doing the same, we might in fact - // drop below the target. But, this is not a problem. - size_t target_size; + // of things to do) or totally (at the very end). + // Notice that when draining the global mark stack partially, due to the racyness + // of the mark stack size update we might in fact drop below the target. But, + // this is not a problem. + // In case of total draining, we simply process until the global mark stack is + // totally empty, disregarding the size counter. if (partially) { - target_size = _cm->partial_mark_stack_size_target(); - } else { - target_size = 0; - } - - if (_cm->mark_stack_size() > target_size) { + size_t const target_size = _cm->partial_mark_stack_size_target(); while (!has_aborted() && _cm->mark_stack_size() > target_size) { - get_entries_from_global_stack(); + if (get_entries_from_global_stack()) { + drain_local_queue(partially); + } + } + } else { + while (!has_aborted() && get_entries_from_global_stack()) { drain_local_queue(partially); } } --- old/src/share/vm/gc/g1/g1ConcurrentMark.hpp 2016-09-05 12:00:53.584425018 +0200 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.hpp 2016-09-05 12:00:53.433420321 +0200 @@ -149,42 +149,88 @@ // // Stores oops in a huge buffer in virtual memory that is always fully committed. // Resizing may only happen during a STW pause when the stack is empty. +// +// Memory is allocated on a "chunk" basis, i.e. a set of oops. For this, the mark +// stack memory is split into evenly sized chunks of oops. Users can only +// add an remove entries on that basis. +// Chunks are filled in increasing address order. Not completely filled chunks +// have a NULL element as a terminating element. +// +// Every chunk has a header containing a single pointer element used for memory +// management. This wastes some space, but is negligible (< .1% with current sizing). +// +// Memory management is done using a mix of tracking a high water-mark indicating +// that all chunks at a lower address are valid chunks, and a singly linked free +// list connecting all empty chunks. class G1CMMarkStack VALUE_OBJ_CLASS_SPEC { - ReservedSpace _reserved_space; // Space currently reserved for the mark stack. +public: + // Number of oops that can fit in a single chunk. + static const size_t OopsPerChunk = 1024 - 1 /* One reference for the next pointer */; +private: + struct OopChunk { + OopChunk* next; + oop data[OopsPerChunk]; + }; - oop* _base; // Bottom address of allocated memory area. - size_t _capacity; // Maximum number of elements. - size_t _index; // One more than last occupied index. + size_t _max_chunk_capacity; // Maximum number of OopChunk elements on the stack. - size_t _saved_index; // Value of _index saved at start of GC to detect mark stack modifications during that time. + OopChunk* _base; // Bottom address of allocated memory area. + size_t _chunk_capacity; // Current maximum number of OopChunk elements. - bool _overflow; - bool _should_expand; + char _pad0[DEFAULT_CACHE_LINE_SIZE]; + OopChunk* volatile _free_list; // Linked list of free chunks that can be allocated by users. + char _pad1[DEFAULT_CACHE_LINE_SIZE - sizeof(OopChunk*)]; + OopChunk* volatile _chunk_list; // List of chunks currently containing data. + char _pad2[DEFAULT_CACHE_LINE_SIZE - sizeof(OopChunk*)]; + + size_t volatile _chunks_in_chunk_list; + char _pad3[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; + + volatile size_t _hwm; // High water mark within the reserved space. + char _pad4[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; + + // Allocate a new chunk from the reserved memory, using the high water mark. Returns + // NULL if out of memory. + OopChunk* allocate_new_chunk(); + + bool _out_of_memory; + + // Atomically add the given chunk to the list. + void add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem); + // Atomically remove and return a chunk from the given list. Returns NULL if the + // list is empty. + OopChunk* remove_chunk_from_list(OopChunk* volatile* list); bool _should_expand; // Resizes the mark stack to the given new capacity. Releases any previous // memory if successful. bool resize(size_t new_capacity); - bool stack_modified() const { return _index != _saved_index; } public: G1CMMarkStack(); ~G1CMMarkStack(); - bool allocate(size_t capacity); + // Alignment and minimum capacity of this mark stack in number of oops. + static size_t capacity_alignment(); - // Pushes the first "n" elements of the given buffer on the stack. - void par_push_arr(oop* buffer, size_t n); + // Allocate and initialize the mark stack with the given number of oops. + bool initialize(size_t initial_capacity, size_t max_capacity); - // Moves up to max elements from the stack into the given buffer. Returns - // the number of elements pushed, and false if the array has been empty. - // Returns true if the buffer contains at least one element. - bool par_pop_arr(oop* buffer, size_t max, size_t* n); + // Pushes the given buffer containing at most OopsPerChunk elements on the mark + // stack. If less than OopsPerChunk elements are to be pushed, the array must + // be terminated with a NULL. + void par_push_chunk(oop* buffer); - bool is_empty() const { return _index == 0; } - size_t capacity() const { return _capacity; } + // Pops a chunk from this mark stack, copying them into the given buffer. This + // chunk may contain up to OopsPerChunk elements. If there are less, the last + // element in the array is a NULL pointer. + bool par_pop_chunk(oop* buffer); - bool overflow() const { return _overflow; } - void clear_overflow() { _overflow = false; } + bool is_empty() const { return _chunk_list == NULL && _chunks_in_chunk_list == 0; } + + size_t capacity() const { return _chunk_capacity; } + + bool is_out_of_memory() const { return _out_of_memory; } + void clear_out_of_memory() { _out_of_memory = false; } bool should_expand() const { return _should_expand; } void set_should_expand(bool value) { _should_expand = value; } @@ -192,20 +238,15 @@ // Expand the stack, typically in response to an overflow condition void expand(); - size_t size() const { return _index; } - - void set_empty() { _index = 0; clear_overflow(); } - - // Record the current index. - void note_start_of_gc(); - - // Make sure that we have not added any entries to the stack during GC. - void note_end_of_gc(); + // Return the approximate number of oops on this mark stack. Racy due to + // unsynchronized access to _chunks_in_chunk_list. + size_t size() const { return _chunks_in_chunk_list * OopsPerChunk; } + + void set_empty(); - // Apply fn to each oop in the mark stack, up to the bound recorded - // via one of the above "note" functions. The mark stack must not + // Apply Fn to every oop on the mark stack. The mark stack must not // be modified while iterating. - template void iterate(Fn fn); + template void iterate(Fn fn) PRODUCT_RETURN; }; // Root Regions are regions that are not empty at the beginning of a @@ -278,7 +319,6 @@ friend class G1CMDrainMarkingStackClosure; friend class G1CMBitMapClosure; friend class G1CMConcurrentMarkingTask; - friend class G1CMMarkStack; friend class G1CMRemarkTask; friend class G1CMTask; @@ -479,22 +519,21 @@ public: // Manipulation of the global mark stack. // The push and pop operations are used by tasks for transfers - // between task-local queues and the global mark stack, and use - // locking for concurrency safety. - bool mark_stack_push(oop* arr, size_t n) { - _global_mark_stack.par_push_arr(arr, n); - if (_global_mark_stack.overflow()) { + // between task-local queues and the global mark stack. + bool mark_stack_push(oop* arr) { + _global_mark_stack.par_push_chunk(arr); + if (_global_mark_stack.is_out_of_memory()) { set_has_overflown(); return false; } return true; } - void mark_stack_pop(oop* arr, size_t max, size_t* n) { - _global_mark_stack.par_pop_arr(arr, max, n); + bool mark_stack_pop(oop* arr) { + return _global_mark_stack.par_pop_chunk(arr); } size_t mark_stack_size() { return _global_mark_stack.size(); } size_t partial_mark_stack_size_target() { return _global_mark_stack.capacity()/3; } - bool mark_stack_overflow() { return _global_mark_stack.overflow(); } + bool mark_stack_overflow() { return _global_mark_stack.is_out_of_memory(); } bool mark_stack_empty() { return _global_mark_stack.is_empty(); } G1CMRootRegions* root_regions() { return &_root_regions; } @@ -601,12 +640,10 @@ // Notify data structures that a GC has started. void note_start_of_gc() { - _global_mark_stack.note_start_of_gc(); } // Notify data structures that a GC is finished. void note_end_of_gc() { - _global_mark_stack.note_end_of_gc(); } // Verify that there are no CSet oops on the stacks (taskqueues / @@ -670,10 +707,7 @@ // references reaches this limit refs_reached_period = 384, // Initial value for the hash seed, used in the work stealing code - init_hash_seed = 17, - // How many entries will be transferred between global stack and - // local queues at once. - global_stack_transfer_size = 1024 + init_hash_seed = 17 }; uint _worker_id; @@ -858,9 +892,10 @@ // It pushes an object on the local queue. inline void push(oop obj); - // These two move entries to/from the global stack. + // Move entries to the global stack. void move_entries_to_global_stack(); - void get_entries_from_global_stack(); + // Move entries from the global stack, return true if we were successful to do so. + bool get_entries_from_global_stack(); // It pops and scans objects from the local queue. If partially is // true, then it stops when the queue size is of a given limit. If --- old/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp 2016-09-05 12:00:54.432451393 +0200 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp 2016-09-05 12:00:54.288446914 +0200 @@ -89,14 +89,26 @@ #undef check_mark +#ifndef PRODUCT template inline void G1CMMarkStack::iterate(Fn fn) { - assert_at_safepoint(true); - assert(!stack_modified(), "Saved index " SIZE_FORMAT " must be the same as " SIZE_FORMAT, _saved_index, _index); - for (size_t i = 0; i < _index; ++i) { - fn(_base[i]); + size_t num_chunks = 0; + + OopChunk* cur = _chunk_list; + while (cur != NULL) { + guarantee(num_chunks <= _chunks_in_chunk_list, "Found " SIZE_FORMAT " oop chunks which is more than there should be", num_chunks); + + for (size_t i = 0; i < OopsPerChunk; ++i) { + if (cur->data[i] == NULL) { + break; + } + fn((oop)cur->data[i]); + } + cur = cur->next; + num_chunks++; } } +#endif // It scans an object and visits its children. inline void G1CMTask::scan_object(oop obj) { process_grey_object(obj); } --- old/src/share/vm/gc/g1/g1OopClosures.hpp 2016-09-05 12:00:55.177474564 +0200 +++ new/src/share/vm/gc/g1/g1OopClosures.hpp 2016-09-05 12:00:55.045470458 +0200 @@ -34,7 +34,6 @@ class G1ConcurrentMark; class DirtyCardToOopClosure; class G1CMBitMap; -class G1CMMarkStack; class G1ParScanThreadState; class G1CMTask; class ReferenceProcessor; --- old/src/share/vm/memory/allocation.hpp 2016-09-05 12:00:55.942498357 +0200 +++ new/src/share/vm/memory/allocation.hpp 2016-09-05 12:00:55.799493909 +0200 @@ -738,6 +738,7 @@ static size_t size_for(size_t length); public: + static E* allocate_or_null(size_t length); static E* allocate(size_t length); static void free(E* addr, size_t length); }; --- old/src/share/vm/memory/allocation.inline.hpp 2016-09-05 12:00:56.731522896 +0200 +++ new/src/share/vm/memory/allocation.inline.hpp 2016-09-05 12:00:56.599518791 +0200 @@ -153,6 +153,24 @@ } template +E* MmapArrayAllocator::allocate_or_null(size_t length) { + size_t size = size_for(length); + int alignment = os::vm_allocation_granularity(); + + char* addr = os::reserve_memory(size, NULL, alignment, F); + if (addr == NULL) { + return NULL; + } + + if (os::commit_memory(addr, size, !ExecMem, "Allocator (commit)")) { + return (E*)addr; + } else { + os::release_memory(addr, size); + return NULL; + } +} + +template E* MmapArrayAllocator::allocate(size_t length) { size_t size = size_for(length); int alignment = os::vm_allocation_granularity();