--- old/src/share/vm/gc/g1/g1ConcurrentMark.cpp 2017-03-07 13:41:01.771184892 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.cpp 2017-03-07 13:41:01.655181394 +0100 @@ -146,15 +146,15 @@ assert(new_capacity <= _max_chunk_capacity, "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity); - OopChunk* new_base = MmapArrayAllocator::allocate_or_null(new_capacity); + TaskQueueEntryChunk* new_base = MmapArrayAllocator::allocate_or_null(new_capacity); if (new_base == NULL) { - log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(OopChunk)); + log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk)); return false; } // Release old mapping. if (_base != NULL) { - MmapArrayAllocator::free(_base, _chunk_capacity); + MmapArrayAllocator::free(_base, _chunk_capacity); } _base = new_base; @@ -166,16 +166,16 @@ } size_t G1CMMarkStack::capacity_alignment() { - return (size_t)lcm(os::vm_allocation_granularity(), sizeof(OopChunk)) / sizeof(void*); + return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry); } bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) { guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized."); - size_t const OopChunkSizeInVoidStar = sizeof(OopChunk) / sizeof(void*); + size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry); - _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; - size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / OopChunkSizeInVoidStar; + _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; + size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar; guarantee(initial_chunk_capacity <= _max_chunk_capacity, "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT, @@ -211,49 +211,49 @@ G1CMMarkStack::~G1CMMarkStack() { if (_base != NULL) { - MmapArrayAllocator::free(_base, _chunk_capacity); + MmapArrayAllocator::free(_base, _chunk_capacity); } } -void G1CMMarkStack::add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem) { +void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) { elem->next = *list; *list = elem; } -void G1CMMarkStack::add_chunk_to_chunk_list(OopChunk* elem) { +void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) { MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); add_chunk_to_list(&_chunk_list, elem); _chunks_in_chunk_list++; } -void G1CMMarkStack::add_chunk_to_free_list(OopChunk* elem) { +void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) { MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); add_chunk_to_list(&_free_list, elem); } -G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_list(OopChunk* volatile* list) { - OopChunk* result = *list; +G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) { + TaskQueueEntryChunk* result = *list; if (result != NULL) { *list = (*list)->next; } return result; } -G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { +G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() { MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag); - OopChunk* result = remove_chunk_from_list(&_chunk_list); + TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list); if (result != NULL) { _chunks_in_chunk_list--; } return result; } -G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_free_list() { +G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() { MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag); return remove_chunk_from_list(&_free_list); } -G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() { +G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() { // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code. // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding // wraparound of _hwm. @@ -266,14 +266,14 @@ return NULL; } - OopChunk* result = ::new (&_base[cur_idx]) OopChunk; + TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk; result->next = NULL; return result; } bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) { // Get a new chunk. - OopChunk* new_chunk = remove_chunk_from_free_list(); + TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list(); if (new_chunk == NULL) { // Did not get a chunk from the free list. Allocate from backing memory. @@ -293,7 +293,7 @@ } bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) { - OopChunk* cur = remove_chunk_from_chunk_list(); + TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list(); if (cur == NULL) { return false; @@ -2009,10 +2009,14 @@ { } void operator()(G1TaskQueueEntry task_entry) const { - guarantee(task_entry.is_array_slice() || task_entry.obj()->is_oop(), + if (task_entry.is_array_slice()) { + guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice())); + return; + } + guarantee(task_entry.obj()->is_oop(), "Non-oop " PTR_FORMAT ", phase: %s, info: %d", p2i(task_entry.obj()), _phase, _info); - guarantee(task_entry.is_array_slice() || !_g1h->is_in_cset(task_entry.obj()), + guarantee(!_g1h->is_in_cset(task_entry.obj()), "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", p2i(task_entry.obj()), _phase, _info); } @@ -2208,7 +2212,7 @@ // We move that task's local finger along. _task->move_finger_to(addr); - _task->scan_object(oop(addr)); + _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr))); // we only partially drain the local queue and global stack _task->drain_local_queue(true); _task->drain_global_stack(true); @@ -2464,14 +2468,14 @@ } if (_task_queue->size() > target_size) { - G1TaskQueueEntry obj; - bool ret = _task_queue->pop_local(obj); + G1TaskQueueEntry entry; + bool ret = _task_queue->pop_local(entry); while (ret) { - scan_object(obj); + scan_task_entry(entry); if (_task_queue->size() <= target_size || has_aborted()) { ret = false; } else { - ret = _task_queue->pop_local(obj); + ret = _task_queue->pop_local(entry); } } } @@ -2876,9 +2880,9 @@ assert(_cm->out_of_regions() && _task_queue->size() == 0, "only way to reach here"); while (!has_aborted()) { - G1TaskQueueEntry obj; - if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { - scan_object(obj); + G1TaskQueueEntry entry; + if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) { + scan_task_entry(entry); // And since we're towards the end, let's totally drain the // local queue and global stack. --- old/src/share/vm/gc/g1/g1ConcurrentMark.hpp 2017-03-07 13:41:02.389203530 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.hpp 2017-03-07 13:41:02.276200122 +0100 @@ -52,10 +52,17 @@ void* _holder; static const uintptr_t ArraySliceBit = 1; + + G1TaskQueueEntry(oop obj) : _holder(obj) { + assert(_holder != NULL, "Not allowed to set NULL task queue element"); + } + G1TaskQueueEntry(HeapWord* addr) : _holder((void*)((uintptr_t)addr | ArraySliceBit)) { } public: + G1TaskQueueEntry(const G1TaskQueueEntry& other) { _holder = other._holder; } G1TaskQueueEntry() : _holder(NULL) { } - G1TaskQueueEntry(oop obj) : _holder(obj) { } - G1TaskQueueEntry(HeapWord* addr) : _holder((void*)((uintptr_t)addr | ArraySliceBit)) { } + + static G1TaskQueueEntry from_slice(HeapWord* what) { return G1TaskQueueEntry(what); } + static G1TaskQueueEntry from_oop(oop obj) { return G1TaskQueueEntry(obj); } G1TaskQueueEntry& operator=(const G1TaskQueueEntry& t) { _holder = t._holder; @@ -74,7 +81,7 @@ HeapWord* slice() const { assert(is_array_slice(), "Trying to read oop " PTR_FORMAT " as array slice", p2i(_holder)); - return (HeapWord*)((uintptr_t)_holder &~ ArraySliceBit); + return (HeapWord*)((uintptr_t)_holder & ~ArraySliceBit); } bool is_oop() const { return !is_array_slice(); } @@ -216,43 +223,43 @@ // Number of oops that can fit in a single chunk. static const size_t EntriesPerChunk = 1024 - 1 /* One reference for the next pointer */; private: - struct OopChunk { - OopChunk* next; + struct TaskQueueEntryChunk { + TaskQueueEntryChunk* next; G1TaskQueueEntry data[EntriesPerChunk]; }; size_t _max_chunk_capacity; // Maximum number of OopChunk elements on the stack. - OopChunk* _base; // Bottom address of allocated memory area. + TaskQueueEntryChunk* _base; // Bottom address of allocated memory area. size_t _chunk_capacity; // Current maximum number of OopChunk elements. char _pad0[DEFAULT_CACHE_LINE_SIZE]; - OopChunk* volatile _free_list; // Linked list of free chunks that can be allocated by users. - char _pad1[DEFAULT_CACHE_LINE_SIZE - sizeof(OopChunk*)]; - OopChunk* volatile _chunk_list; // List of chunks currently containing data. + TaskQueueEntryChunk* volatile _free_list; // Linked list of free chunks that can be allocated by users. + char _pad1[DEFAULT_CACHE_LINE_SIZE - sizeof(TaskQueueEntryChunk*)]; + TaskQueueEntryChunk* volatile _chunk_list; // List of chunks currently containing data. volatile size_t _chunks_in_chunk_list; - char _pad2[DEFAULT_CACHE_LINE_SIZE - sizeof(OopChunk*) - sizeof(size_t)]; + char _pad2[DEFAULT_CACHE_LINE_SIZE - sizeof(TaskQueueEntryChunk*) - sizeof(size_t)]; volatile size_t _hwm; // High water mark within the reserved space. char _pad4[DEFAULT_CACHE_LINE_SIZE - sizeof(size_t)]; // Allocate a new chunk from the reserved memory, using the high water mark. Returns // NULL if out of memory. - OopChunk* allocate_new_chunk(); + TaskQueueEntryChunk* allocate_new_chunk(); volatile bool _out_of_memory; // Atomically add the given chunk to the list. - void add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem); + void add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem); // Atomically remove and return a chunk from the given list. Returns NULL if the // list is empty. - OopChunk* remove_chunk_from_list(OopChunk* volatile* list); + TaskQueueEntryChunk* remove_chunk_from_list(TaskQueueEntryChunk* volatile* list); - void add_chunk_to_chunk_list(OopChunk* elem); - void add_chunk_to_free_list(OopChunk* elem); + void add_chunk_to_chunk_list(TaskQueueEntryChunk* elem); + void add_chunk_to_free_list(TaskQueueEntryChunk* elem); - OopChunk* remove_chunk_from_chunk_list(); - OopChunk* remove_chunk_from_free_list(); + TaskQueueEntryChunk* remove_chunk_from_chunk_list(); + TaskQueueEntryChunk* remove_chunk_from_free_list(); bool _should_expand; @@ -270,15 +277,15 @@ // Allocate and initialize the mark stack with the given number of oops. bool initialize(size_t initial_capacity, size_t max_capacity); - // Pushes the given buffer containing at most OopsPerChunk elements on the mark - // stack. If less than OopsPerChunk elements are to be pushed, the array must + // Pushes the given buffer containing at most EntriesPerChunk elements on the mark + // stack. If less than EntriesPerChunk elements are to be pushed, the array must // be terminated with a NULL. // Returns whether the buffer contents were successfully pushed to the global mark // stack. bool par_push_chunk(G1TaskQueueEntry* buffer); // Pops a chunk from this mark stack, copying them into the given buffer. This - // chunk may contain up to OopsPerChunk elements. If there are less, the last + // chunk may contain up to EntriesPerChunk elements. If there are less, the last // element in the array is a NULL pointer. bool par_pop_chunk(G1TaskQueueEntry* buffer); @@ -876,7 +883,7 @@ // mark bitmap scan, and so needs to be pushed onto the mark stack. bool is_below_finger(oop obj, HeapWord* global_finger) const; - template void process_grey_object(G1TaskQueueEntry task_entry); + template void process_grey_task_entry(G1TaskQueueEntry task_entry); public: // Apply the closure on the given area of the objArray. Return the number of words // scanned. @@ -941,7 +948,7 @@ inline void deal_with_reference(oop obj); // It scans an object and visits its children. - inline void scan_object(G1TaskQueueEntry task_entry); + inline void scan_task_entry(G1TaskQueueEntry task_entry); // It pushes an object on the local queue. inline void push(G1TaskQueueEntry task_entry); --- old/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp 2017-03-07 13:41:02.993221746 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp 2017-03-07 13:41:02.879218308 +0100 @@ -97,7 +97,7 @@ size_t num_chunks = 0; - OopChunk* cur = _chunk_list; + TaskQueueEntryChunk* cur = _chunk_list; while (cur != NULL) { guarantee(num_chunks <= _chunks_in_chunk_list, "Found " SIZE_FORMAT " oop chunks which is more than there should be", num_chunks); @@ -114,7 +114,7 @@ #endif // It scans an object and visits its children. -inline void G1CMTask::scan_object(G1TaskQueueEntry task_entry) { process_grey_object(task_entry); } +inline void G1CMTask::scan_task_entry(G1TaskQueueEntry task_entry) { process_grey_task_entry(task_entry); } inline void G1CMTask::push(G1TaskQueueEntry task_entry) { assert(task_entry.is_array_slice() || _g1h->is_in_g1_reserved(task_entry.obj()), "invariant"); @@ -167,7 +167,7 @@ } template -inline void G1CMTask::process_grey_object(G1TaskQueueEntry task_entry) { +inline void G1CMTask::process_grey_task_entry(G1TaskQueueEntry task_entry) { assert(scan || (task_entry.is_oop() && task_entry.obj()->is_typeArray()), "Skipping scan of grey non-typeArray"); assert(task_entry.is_array_slice() || _nextMarkBitMap->isMarked((HeapWord*)task_entry.obj()), "Any stolen object should be a slice or marked"); @@ -212,6 +212,7 @@ // be pushed on the stack. So, some duplicate work, but no // correctness problems. if (is_below_finger(obj, global_finger)) { + G1TaskQueueEntry entry = G1TaskQueueEntry::from_oop(obj); if (obj->is_typeArray()) { // Immediately process arrays of primitive types, rather // than pushing on the mark stack. This keeps us from @@ -223,9 +224,9 @@ // by only doing a bookkeeping update and avoiding the // actual scan of the object - a typeArray contains no // references, and the metadata is built-in. - process_grey_object(obj); + process_grey_task_entry(entry); } else { - push(obj); + push(entry); } } } --- old/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp 2017-03-07 13:41:04.554268823 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp 2017-03-07 13:41:04.441265415 +0100 @@ -27,8 +27,7 @@ #include "gc/g1/g1ConcurrentMarkObjArrayProcessor.inline.hpp" void G1CMObjArrayProcessor::push_array_slice(HeapWord* what) { - G1TaskQueueEntry entry(what); - _task->push(entry); + _task->push(G1TaskQueueEntry::from_slice(what)); } size_t G1CMObjArrayProcessor::process_array_slice(objArrayOop obj, HeapWord* start_from, size_t remaining) { --- old/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp 2017-03-07 13:41:10.826457978 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp 2017-03-07 13:41:10.712454540 +0100 @@ -50,7 +50,7 @@ G1CMObjArrayProcessor(G1CMTask* task) : _task(task) { } - // Process the given continuation "oop". Returns the number of words scanned. + // Process the given continuation. Returns the number of words scanned. size_t process_slice(HeapWord* slice); // Start processing the given objArrayOop by scanning the header and pushing its // continuation.