--- old/src/share/vm/gc/g1/g1ConcurrentMark.cpp 2016-11-22 11:22:16.536078201 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.cpp 2016-11-22 11:22:16.447075507 +0100 @@ -2009,10 +2009,10 @@ { } void operator()(oop obj) const { - guarantee(obj->is_oop(), + guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(), "Non-oop " PTR_FORMAT ", phase: %s, info: %d", p2i(obj), _phase, _info); - guarantee(!_g1h->obj_in_cs(obj), + guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->obj_in_cs(obj), "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d", p2i(obj), _phase, _info); } @@ -2436,6 +2436,7 @@ if (elem == NULL) { break; } + assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem)); bool success = _task_queue->push(elem); // We only call this when the local queue is empty or under a // given target limit. So, we do not expect this push to fail. @@ -2447,8 +2448,23 @@ return true; } +void G1CMTask::process_object(oop obj) { + if (!G1CMObjArrayProcessor::is_array_slice(obj)) { + assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); + assert(!_g1h->is_on_master_free_list( + _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); + + scan_object(obj); + } else { + _words_scanned += _objArray_processor.process_slice_reference(obj); + check_limits(); + } +} + void G1CMTask::drain_local_queue(bool partially) { - if (has_aborted()) return; + if (has_aborted()) { + return; + } // Decide what the target size is, depending whether we're going to // drain it partially (so that other tasks can steal if they run out @@ -2464,12 +2480,7 @@ oop obj; bool ret = _task_queue->pop_local(obj); while (ret) { - assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" ); - assert(!_g1h->is_on_master_free_list( - _g1h->heap_region_containing((HeapWord*) obj)), "invariant"); - - scan_object(obj); - + process_object(obj); if (_task_queue->size() <= target_size || has_aborted()) { ret = false; } else { @@ -2777,9 +2788,7 @@ regular_clock_call(); } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) { if (_nextMarkBitMap->isMarked(mr.start())) { - // The object is marked - apply the closure - BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start()); - bitmap_closure.do_bit(offset); + scan_object((oop)mr.start()); } // Even if this task aborted while scanning the humongous object // we can (and should) give up the current region. @@ -2880,9 +2889,9 @@ while (!has_aborted()) { oop obj; if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) { - assert(_nextMarkBitMap->isMarked((HeapWord*) obj), - "any stolen object should be marked"); - scan_object(obj); + assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked((HeapWord*) obj), + "Any stolen object should be a slice or marked"); + process_object(obj); // And since we're towards the end, let's totally drain the // local queue and global stack. @@ -3003,6 +3012,7 @@ G1CMTaskQueueSet* task_queues) : _g1h(G1CollectedHeap::heap()), _worker_id(worker_id), _cm(cm), + _objArray_processor(this), _claimed(false), _nextMarkBitMap(NULL), _hash_seed(17), _task_queue(task_queue), --- old/src/share/vm/gc/g1/g1ConcurrentMark.hpp 2016-11-22 11:22:17.066094238 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.hpp 2016-11-22 11:22:16.976091514 +0100 @@ -26,6 +26,7 @@ #define SHARE_VM_GC_G1_G1CONCURRENTMARK_HPP #include "classfile/javaClasses.hpp" +#include "gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp" #include "gc/g1/g1RegionToSpaceMapper.hpp" #include "gc/g1/heapRegionSet.hpp" #include "gc/shared/taskqueue.hpp" @@ -706,11 +707,13 @@ words_scanned_period = 12*1024, // The regular clock call is called once the number of visited // references reaches this limit - refs_reached_period = 384, + refs_reached_period = 1024, // Initial value for the hash seed, used in the work stealing code init_hash_seed = 17 }; + G1CMObjArrayProcessor _objArray_processor; + uint _worker_id; G1CollectedHeap* _g1h; G1ConcurrentMark* _cm; @@ -827,7 +830,11 @@ template void process_grey_object(oop obj); + void process_object(oop obj); public: + // Apply the closure on the given area of the objArray. Return the number of words + // scanned. + inline size_t scan_objArray(objArrayOop const obj, MemRegion mr); // It resets the task; it should be called right at the beginning of // a marking phase. void reset(G1CMBitMap* _nextMarkBitMap); --- old/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp 2016-11-22 11:22:17.609110668 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp 2016-11-22 11:22:17.517107885 +0100 @@ -117,11 +117,11 @@ inline void G1CMTask::push(oop obj) { HeapWord* objAddr = (HeapWord*) obj; - assert(_g1h->is_in_g1_reserved(objAddr), "invariant"); - assert(!_g1h->is_on_master_free_list( + assert(G1CMObjArrayProcessor::is_array_slice(obj) || _g1h->is_in_g1_reserved(objAddr), "invariant"); + assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_on_master_free_list( _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant"); - assert(!_g1h->is_obj_ill(obj), "invariant"); - assert(_nextMarkBitMap->isMarked(objAddr), "invariant"); + assert(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_obj_ill(obj), "invariant"); + assert(G1CMObjArrayProcessor::is_array_slice(obj) || _nextMarkBitMap->isMarked(objAddr), "invariant"); if (!_task_queue->push(obj)) { // The local task queue looks full. We need to push some entries @@ -171,15 +171,21 @@ assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray"); assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant"); - size_t obj_size = obj->size(); - _words_scanned += obj_size; - if (scan) { - obj->oop_iterate(_cm_oop_closure); + if (obj->is_objArray() && _objArray_processor.is_large(objArrayOop(obj)->size())) { + _words_scanned += _objArray_processor.process_large_grey_object(obj, objArrayOop(obj)->size()); + } else { + _words_scanned += obj->oop_iterate_size(_cm_oop_closure);; + } } check_limits(); } +inline size_t G1CMTask::scan_objArray(objArrayOop const obj, MemRegion mr) { + obj->oop_iterate(_cm_oop_closure, mr); + return mr.word_size(); +} + inline void G1CMTask::make_reference_grey(oop obj) { if (_cm->par_mark(obj)) { // No OrderAccess:store_load() is needed. It is implicit in the --- old/src/share/vm/runtime/globals.hpp 2016-11-22 11:22:18.135126585 +0100 +++ new/src/share/vm/runtime/globals.hpp 2016-11-22 11:22:18.029123377 +0100 @@ -1988,7 +1988,7 @@ experimental(uintx, WorkStealingSpinToYieldRatio, 10, \ "Ratio of hard spins to calls to yield") \ \ - develop(uintx, ObjArrayMarkingStride, 512, \ + develop(uintx, ObjArrayMarkingStride, 2048, \ "Number of object array elements to push onto the marking stack " \ "before pushing a continuation entry") \ \ --- /dev/null 2016-11-21 12:07:06.466762882 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.cpp 2016-11-22 11:22:18.611140988 +0100 @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "gc/g1/g1ConcurrentMark.inline.hpp" + +oop G1CMObjArrayProcessor::encode_array_slice(HeapWord* addr) { + return oop((void*)((uintptr_t)addr | ArraySliceBit)); +} + +HeapWord* G1CMObjArrayProcessor::decode_array_slice(oop value) { + assert(is_array_slice(value), "Given value " PTR_FORMAT " is not an array slice", p2i(value)); + return (HeapWord*)((uintptr_t)(void*)value & ~ArraySliceBit); +} + +void G1CMObjArrayProcessor::push_array_slice(HeapWord* what) { + oop obj = encode_array_slice(what); + _task->push(obj); +} + +size_t G1CMObjArrayProcessor::process_array_slice(objArrayOop const obj, HeapWord* start_from, size_t remaining) { + size_t words_to_scan = remaining; + + if (words_to_scan > ObjArrayMarkingStride) { + words_to_scan = ObjArrayMarkingStride; + push_array_slice(start_from + words_to_scan); + } + + // Then process current area. + MemRegion mr(start_from, words_to_scan); + return _task->scan_objArray(obj, mr); +} + +size_t G1CMObjArrayProcessor::process_large_grey_object(oop const obj, size_t word_size) { + assert(obj->is_objArray() && is_large(obj->size()), "Must be an array object %d and large " SIZE_FORMAT, obj->is_objArray(), (size_t)obj->size()); + + return process_array_slice(objArrayOop(obj), (HeapWord*)obj, word_size); +} + +size_t G1CMObjArrayProcessor::process_slice_reference(oop const obj) { + HeapWord* const decoded_address = decode_array_slice(obj); + + // Find the start address of the objArrayOop. + // Shortcut the BOT access if the given address is from a humonguous object. The BOT + // slide is fast enough for "smaller" objects in non-humongous regions, but is slower + // than directly using heap region table. + G1CollectedHeap* g1h = G1CollectedHeap::heap(); + HeapRegion* r = g1h->heap_region_containing(decoded_address); + + HeapWord* const start_address = r->is_humongous() ? + r->humongous_start_region()->bottom() : + g1h->block_start(decoded_address); + + assert(oop(start_address)->is_objArray(), "Address " PTR_FORMAT " does not refer to an object array ", p2i(start_address)); + assert(start_address < decoded_address, + "Object start address " PTR_FORMAT " must be smaller than decoded address " PTR_FORMAT, + p2i(start_address), + p2i(decoded_address)); + + objArrayOop const objArray = objArrayOop(start_address); + + size_t const already_scanned = decoded_address - start_address; + size_t const remaining = objArray->size() - already_scanned; + + return process_array_slice(objArray, decoded_address, remaining); +} --- /dev/null 2016-11-21 12:07:06.466762882 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMarkObjArrayProcessor.hpp 2016-11-22 11:22:19.073154968 +0100 @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP +#define SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP + +#include "oops/oopsHierarchy.hpp" +#include "memory/allocation.hpp" + +class G1CMTask; + +// Helper class to mark through large objArrays during marking in an efficient way. +// Instead of pushing large object arrays, we push continuations onto the +// mark stack. These continuations are identified by having their LSB set. +// This allows incremental processing of large objects. +class G1CMObjArrayProcessor VALUE_OBJ_CLASS_SPEC { +private: + // The bit mask for the continuation indicator of elements on the mark stack. + static const size_t ArraySliceBit = 1; + + // Reference to the task for doing the actual work. + G1CMTask* _task; + + // Encodes the given address as a continuation "oop". + oop encode_array_slice(HeapWord* addr); + // Remove the continuation marker from the given oop from the mark stack. + HeapWord* decode_array_slice(oop value); + + // Push the continuation at the given address onto the mark stack. + void push_array_slice(HeapWord* addr); + + // Process (apply the closure) on the given continuation of the given objArray. + size_t process_array_slice(objArrayOop const obj, HeapWord* start_from, size_t remaining); +public: + static bool is_array_slice(void* obj) { return ((uintptr_t)obj & ArraySliceBit) != 0; } + + bool is_large(size_t const obj_size) const { return obj_size >= 2 * ObjArrayMarkingStride; } + + G1CMObjArrayProcessor(G1CMTask* task) : _task(task) { + } + + // Process the given continuation "oop". Returns the number of words scanned. + size_t process_slice_reference(oop const obj); + // Start processing the given objArrayOop by scanning the header and pushing its + // continuation. + size_t process_large_grey_object(oop const obj, size_t word_size); +}; + +#endif /* SHARE_VM_GC_G1_G1CONCURRENTMARKOBJARRAYPROCESSOR_HPP */