< prev index next >

src/share/vm/gc/serial/genMarkSweep.cpp

Print this page
rev 8615 : CMSParallelFullGC: Parallel version of CMS Full GC.

*** 151,160 **** --- 151,195 ---- gch->update_time_of_last_gc(now); gch->trace_heap_after_gc(_gc_tracer); } + typedef OverflowTaskQueue<oop, mtGC> ObjTaskQueue; + typedef GenericTaskQueueSet<ObjTaskQueue, mtGC> ObjTaskQueueSet; + typedef OverflowTaskQueue<ObjArrayTask, mtGC> ObjArrayTaskQueue; + typedef GenericTaskQueueSet<ObjArrayTaskQueue, mtGC> ObjArrayTaskQueueSet; + + ObjTaskQueueSet* GenMarkSweep::_pms_task_queues = NULL; + ObjArrayTaskQueueSet* GenMarkSweep::_pms_objarray_task_queues = NULL; + ObjTaskQueue* GenMarkSweep::_pms_vm_thread_task_queue = NULL; + ObjArrayTaskQueue* GenMarkSweep::_pms_vm_thread_objarray_task_queue = NULL; + bool GenMarkSweep::_pms_task_queues_initialized = false; + + // Initialize data structures for PMS. + void GenMarkSweep::initialize_pms_task_queues() { + GenCollectedHeap* gch = GenCollectedHeap::heap(); + WorkGang* work_gang = gch->workers(); + int n_workers = work_gang->total_workers(); + _pms_task_queues = new ObjTaskQueueSet(n_workers); + _pms_objarray_task_queues = new ObjArrayTaskQueueSet(n_workers); + + for (int i = 0; i < n_workers; i++) { + ObjTaskQueue* q = new ObjTaskQueue(); + _pms_task_queues->register_queue(i, q); + _pms_task_queues->queue(i)->initialize(); + + ObjArrayTaskQueue* oaq = new ObjArrayTaskQueue(); + _pms_objarray_task_queues->register_queue(i, oaq); + _pms_objarray_task_queues->queue(i)->initialize(); + } + + _pms_vm_thread_task_queue = new ObjTaskQueue(); + _pms_vm_thread_task_queue->initialize(); + _pms_vm_thread_objarray_task_queue = new ObjArrayTaskQueue(); + _pms_vm_thread_objarray_task_queue->initialize(); + } + void GenMarkSweep::allocate_stacks() { GenCollectedHeap* gch = GenCollectedHeap::heap(); // Scratch request on behalf of old generation; will do no allocation. ScratchBlock* scratch = gch->gather_scratch(gch->old_gen(), 0);
*** 167,176 **** --- 202,280 ---- _preserved_count_max = 0; } _preserved_marks = (PreservedMark*)scratch; _preserved_count = 0; + + if (CMSParallelFullGC) { + if (!_pms_task_queues_initialized) { + _pms_task_queues_initialized = true; + initialize_pms_task_queues(); + } + + // Split evenly the scratch memory among the vm thread and the + // worker threads. + WorkGang* work_gang = gch->workers(); + int n_workers = work_gang->total_workers(); + PreservedMark* preserved_marks_top = _preserved_marks; + size_t preserved_count_max_per_thread = _preserved_count_max / (1 + n_workers); + + NamedThread* vm_thread = Thread::current()->as_Named_thread(); + assert(vm_thread->is_VM_thread(), "Must be run by the VM thread"); + + vm_thread->_pms_task_queue = _pms_vm_thread_task_queue; + vm_thread->_pms_objarray_task_queue = _pms_vm_thread_objarray_task_queue; + // Assign the statically allocated data structures to the VM + // thread and avoid allocating a new set for the VM thread. + vm_thread->_pms_preserved_mark_stack = &_preserved_mark_stack; + vm_thread->_pms_preserved_oop_stack = &_preserved_oop_stack; + vm_thread->_pms_preserved_count = _preserved_count; + vm_thread->_pms_preserved_count_max = preserved_count_max_per_thread; + vm_thread->_pms_preserved_marks = preserved_marks_top; + + preserved_marks_top += preserved_count_max_per_thread; + + // allocate per-thread marking_stack and objarray_stack here. + for (int i = 0; i < n_workers; i++) { + GangWorker* worker = work_gang->gang_worker(i); + // typedef to workaround NEW_C_HEAP_OBJ macro, which can not deal with ',' + typedef Stack<markOop, mtGC> GCMarkOopStack; + typedef Stack<oop, mtGC> GCOopStack; + // A ResourceStack might be a good choice here, but since there's no precedent of its + // use anywhere else in HotSpot, it may not be reliable. Instead, allocate a Stack + // with NEW_C_HEAP_OBJ, and call the constructor explicitly. + worker->_pms_preserved_mark_stack = NEW_C_HEAP_OBJ(GCMarkOopStack, mtGC); + new (worker->_pms_preserved_mark_stack) Stack<markOop, mtGC>(); + worker->_pms_preserved_oop_stack = NEW_C_HEAP_OBJ(GCOopStack, mtGC); + new (worker->_pms_preserved_oop_stack) Stack<oop, mtGC>(); + worker->_pms_preserved_count = 0; + worker->_pms_preserved_count_max = preserved_count_max_per_thread; + worker->_pms_preserved_marks = preserved_marks_top; + preserved_marks_top += preserved_count_max_per_thread; + } + // Note _preserved_marks and _preserved_count_max aren't directly used + // by the marking code if CMSParallelFullGC. + assert(preserved_marks_top <= _preserved_marks + _preserved_count_max, + "buffer overrun"); + + assert(_pms_mark_bit_map != NULL, "the mark bit map must be initialized at this point."); + if (ShareCMSMarkBitMapWithParallelFullGC) { + // Clear it before the GC because it's shared and can be dirty + // here. + _pms_mark_bit_map->clear(); + } else { + // If the mark bit map isn't shared, clear it at the end of GC. + assert(_pms_mark_bit_map->isAllClear(), + "Must have been cleared at the last invocation or at initialization."); + } + _pms_mark_counter = 0; + + assert(_pms_region_array_set == NULL, "Must be NULL"); + // Create region arrays before marking. + // We are in a ResourceMark in CMSCollector::do_collection(). + _pms_region_array_set = new PMSRegionArraySet(); + } } void GenMarkSweep::deallocate_stacks() { if (!UseG1GC) {
*** 180,194 **** _preserved_mark_stack.clear(true); _preserved_oop_stack.clear(true); _marking_stack.clear(); _objarray_stack.clear(true); } void GenMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) { // Recursively traverse all live objects and mark them ! GCTraceTime tm("phase 1", PrintGC && Verbose, true, _gc_timer, _gc_tracer->gc_id()); GenCollectedHeap* gch = GenCollectedHeap::heap(); // Because follow_root_closure is created statically, cannot // use OopsInGenClosure constructor which takes a generation, --- 284,365 ---- _preserved_mark_stack.clear(true); _preserved_oop_stack.clear(true); _marking_stack.clear(); _objarray_stack.clear(true); + + if (CMSParallelFullGC) { + assert_marking_stack_empty(); + + NamedThread* vm_thread = Thread::current()->as_Named_thread(); + assert(vm_thread->is_VM_thread(), "Must be run by the main CMS thread"); + vm_thread->reset_pms_data(); + + // clear per-thread marking_stack and objarray_stack here. + GenCollectedHeap* gch = GenCollectedHeap::heap(); + WorkGang* work_gang = gch->workers(); + int n_workers = work_gang->total_workers(); + + for (int i = 0; i < n_workers; i++) { + GangWorker* worker = work_gang->gang_worker(i); + // typedef to workaround FREE_C_HEAP_ARRAY macro, which can not deal + // with ',' + typedef Stack<markOop, mtGC> GCMarkOopStack; + typedef Stack<oop, mtGC> GCOopStack; + // Call the Stack destructor which is the clear function + // since FREE_C_HEAP_ARRAY doesn't. + ((Stack<markOop, mtGC>*)worker->_pms_preserved_mark_stack)->clear(true); + ((Stack<oop, mtGC>*)worker->_pms_preserved_oop_stack)->clear(true); + // Free the allocated memory + FREE_C_HEAP_ARRAY(GCMarkOopStack, worker->_pms_preserved_mark_stack); + FREE_C_HEAP_ARRAY(GCOopStack, worker->_pms_preserved_oop_stack); + worker->_pms_preserved_mark_stack = NULL; + worker->_pms_preserved_oop_stack = NULL; + + worker->reset_pms_data(); + } + + if (!ShareCMSMarkBitMapWithParallelFullGC) { + _pms_mark_bit_map->clear(); + } + _pms_region_array_set->cleanup(); + _pms_region_array_set = NULL; + _pms_mark_counter = 0; + } + } + + void GenMarkSweep::assert_marking_stack_empty() { + #ifdef ASSERT + if (!CMSParallelFullGC) { + assert(_marking_stack.is_empty(), "just drained"); + assert(_objarray_stack.is_empty(), "just drained"); + } else { + NamedThread* thr = Thread::current()->as_Named_thread(); + + assert(thr->is_VM_thread(), "Must be run by the main CMS thread"); + assert(((ObjTaskQueue*)thr->_pms_task_queue)->is_empty(), "just drained"); + assert(((ObjArrayTaskQueue*)thr->_pms_objarray_task_queue)->is_empty(), "just drained"); + + // Check that all the per-thread marking stacks are empty here. + GenCollectedHeap* gch = GenCollectedHeap::heap(); + WorkGang* work_gang = gch->workers(); + int n_workers = work_gang->total_workers(); + + for (int i = 0; i < n_workers; i++) { + GangWorker* worker = work_gang->gang_worker(i); + assert(((ObjTaskQueue*)worker->_pms_task_queue)->is_empty(), "just drained"); + assert(((ObjArrayTaskQueue*)worker->_pms_objarray_task_queue)->is_empty(), "just drained"); + } + } + #endif // ASSERT } void GenMarkSweep::mark_sweep_phase1(bool clear_all_softrefs) { // Recursively traverse all live objects and mark them ! GCTraceTime tm("phase 1", ! PrintGC && (Verbose || LogCMSParallelFullGC), ! true, _gc_timer, _gc_tracer->gc_id()); GenCollectedHeap* gch = GenCollectedHeap::heap(); // Because follow_root_closure is created statically, cannot // use OopsInGenClosure constructor which takes a generation,
*** 198,246 **** // Need new claim bits before marking starts. ClassLoaderDataGraph::clear_claimed_marks(); { StrongRootsScope srs(1); gch->gen_process_roots(&srs, GenCollectedHeap::OldGen, false, // Younger gens are not roots. GenCollectedHeap::SO_None, ClassUnloading, &follow_root_closure, &follow_root_closure, &follow_cld_closure); } // Process reference objects found during marking { ref_processor()->setup_policy(clear_all_softrefs); const ReferenceProcessorStats& stats = ref_processor()->process_discovered_references( &is_alive, &keep_alive, &follow_stack_closure, NULL, _gc_timer, _gc_tracer->gc_id()); gc_tracer()->report_gc_reference_stats(stats); } // This is the point where the entire marking should have completed. ! assert(_marking_stack.is_empty(), "Marking should have completed"); // Unload classes and purge the SystemDictionary. bool purged_class = SystemDictionary::do_unloading(&is_alive); // Unload nmethods. CodeCache::do_unloading(&is_alive, purged_class); // Prune dead klasses from subklass/sibling/implementor lists. Klass::clean_weak_klass_links(&is_alive); // Delete entries for dead interned strings. StringTable::unlink(&is_alive); // Clean up unreferenced symbols in symbol table. SymbolTable::unlink(); gc_tracer()->report_object_count_after_gc(&is_alive); } void GenMarkSweep::mark_sweep_phase2() { --- 369,463 ---- // Need new claim bits before marking starts. ClassLoaderDataGraph::clear_claimed_marks(); { + GCTraceTime tm1("marking", PrintGC && (Verbose || LogCMSParallelFullGC), + true, NULL, _gc_tracer->gc_id()); + if (!CMSParallelFullGC) { StrongRootsScope srs(1); gch->gen_process_roots(&srs, GenCollectedHeap::OldGen, false, // Younger gens are not roots. GenCollectedHeap::SO_None, ClassUnloading, &follow_root_closure, &follow_root_closure, &follow_cld_closure); + } else { + GenCollectedHeap* gch = GenCollectedHeap::heap(); + WorkGang* workers = gch->workers(); + assert(workers != NULL, "Need parallel worker threads."); + int n_workers = workers->active_workers(); + + StrongRootsScope srs(n_workers); + PMSMarkTask tsk(&srs, workers, _pms_task_queues, _pms_objarray_task_queues); + if (n_workers > 1) { + // Make sure refs discovery MT-safe + assert(ref_processor()->discovery_is_mt(), + "Ref discovery must already be set to MT-safe"); + workers->run_task(&tsk); + } else { + tsk.work(0); + } + } } + assert_marking_stack_empty(); + // Process reference objects found during marking { + GCTraceTime tm2("ref processing", PrintGC && (Verbose || LogCMSParallelFullGC), + true, NULL, _gc_tracer->gc_id()); ref_processor()->setup_policy(clear_all_softrefs); + + if (ref_processor()->processing_is_mt()) { + assert(CMSParallelFullGC, "CMSParallelFullGC must be true"); + PMSRefProcTaskExecutor task_executor(_pms_task_queues, _pms_objarray_task_queues); + const ReferenceProcessorStats& stats = + ref_processor()->process_discovered_references( + &is_alive, &keep_alive, &follow_stack_closure, &task_executor, _gc_timer, _gc_tracer->gc_id()); + gc_tracer()->report_gc_reference_stats(stats); + } else { + assert(!CMSParallelFullGC, "CMSParallelFullGC must be false"); const ReferenceProcessorStats& stats = ref_processor()->process_discovered_references( &is_alive, &keep_alive, &follow_stack_closure, NULL, _gc_timer, _gc_tracer->gc_id()); gc_tracer()->report_gc_reference_stats(stats); } + } + GCTraceTime tm3("class unloading", PrintGC && (Verbose || LogCMSParallelFullGC), + true, NULL, _gc_tracer->gc_id()); // This is the point where the entire marking should have completed. ! assert_marking_stack_empty(); // Unload classes and purge the SystemDictionary. bool purged_class = SystemDictionary::do_unloading(&is_alive); // Unload nmethods. CodeCache::do_unloading(&is_alive, purged_class); + assert_marking_stack_empty(); // Prune dead klasses from subklass/sibling/implementor lists. Klass::clean_weak_klass_links(&is_alive); // Delete entries for dead interned strings. StringTable::unlink(&is_alive); // Clean up unreferenced symbols in symbol table. SymbolTable::unlink(); + #ifdef ASSERT + if (CMSParallelFullGC) { + // This is expensive! Verify that the region live sizes computed + // during marking match what the mark bit map says. + MarkSweep::pms_region_array_set()->verify_live_size(); + } + #endif + gc_tracer()->report_object_count_after_gc(&is_alive); } void GenMarkSweep::mark_sweep_phase2() {
*** 257,274 **** // phase2, phase3 and phase4, but the ValidateMarkSweep live oops // tracking expects us to do so. See comment under phase4. GenCollectedHeap* gch = GenCollectedHeap::heap(); ! GCTraceTime tm("phase 2", PrintGC && Verbose, true, _gc_timer, _gc_tracer->gc_id()); gch->prepare_for_compaction(); } class GenAdjustPointersClosure: public GenCollectedHeap::GenClosure { public: void do_generation(Generation* gen) { gen->adjust_pointers(); } }; void GenMarkSweep::mark_sweep_phase3() { --- 474,497 ---- // phase2, phase3 and phase4, but the ValidateMarkSweep live oops // tracking expects us to do so. See comment under phase4. GenCollectedHeap* gch = GenCollectedHeap::heap(); ! GCTraceTime tm("phase 2", PrintGC && (Verbose || LogCMSParallelFullGC), ! true, _gc_timer, _gc_tracer->gc_id()); gch->prepare_for_compaction(); } class GenAdjustPointersClosure: public GenCollectedHeap::GenClosure { public: void do_generation(Generation* gen) { + GCTraceTime tm("per-gen-adjust", PrintGC && (Verbose || LogCMSParallelFullGC), + true, NULL, GCId::peek()); + if (LogCMSParallelFullGC) { + gclog_or_tty->print_cr("%s", gen->name()); + } gen->adjust_pointers(); } }; void GenMarkSweep::mark_sweep_phase3() {
*** 285,311 **** // as the Universe has not been created when the static constructors // are run. adjust_pointer_closure.set_orig_generation(gch->old_gen()); { StrongRootsScope srs(1); - gch->gen_process_roots(&srs, GenCollectedHeap::OldGen, false, // Younger gens are not roots. GenCollectedHeap::SO_AllCodeCache, GenCollectedHeap::StrongAndWeakRoots, &adjust_pointer_closure, &adjust_pointer_closure, &adjust_cld_closure); } gch->gen_process_weak_roots(&adjust_pointer_closure); adjust_marks(); GenAdjustPointersClosure blk; gch->generation_iterate(&blk, true); } class GenCompactClosure: public GenCollectedHeap::GenClosure { public: void do_generation(Generation* gen) { --- 508,568 ---- // as the Universe has not been created when the static constructors // are run. adjust_pointer_closure.set_orig_generation(gch->old_gen()); { + GCTraceTime tm("adjust-strong-roots", + (PrintGC && Verbose) || LogCMSParallelFullGC, + true, NULL, _gc_tracer->gc_id()); + if (!CMSParallelFullGC) { StrongRootsScope srs(1); gch->gen_process_roots(&srs, GenCollectedHeap::OldGen, false, // Younger gens are not roots. GenCollectedHeap::SO_AllCodeCache, GenCollectedHeap::StrongAndWeakRoots, &adjust_pointer_closure, &adjust_pointer_closure, &adjust_cld_closure); + } else { + WorkGang* workers = gch->workers(); + assert(workers != NULL, "Need parallel worker threads."); + int n_workers = workers->total_workers(); + StrongRootsScope srs(n_workers); + PMSAdjustRootsTask tsk(&srs, workers); + // Set up for parallel process_strong_roots work. + if (n_workers > 1) { + workers->run_task(&tsk); + } else { + tsk.work(0); + } + } } + { + GCTraceTime tm("adjust-weak-roots", + PrintGC && (Verbose || LogCMSParallelFullGC), + true, NULL, _gc_tracer->gc_id()); + // Now adjust pointers in remaining weak roots. (All of which should + // have been cleared if they pointed to non-surviving objects.) gch->gen_process_weak_roots(&adjust_pointer_closure); + } + { + GCTraceTime tm("adjust-preserved-marks", + PrintGC && (Verbose || LogCMSParallelFullGC), + true, NULL, _gc_tracer->gc_id()); adjust_marks(); + } + + { + GCTraceTime tm("adjust-heap", + PrintGC && (Verbose || LogCMSParallelFullGC), + true, NULL, _gc_tracer->gc_id()); GenAdjustPointersClosure blk; gch->generation_iterate(&blk, true); + } } class GenCompactClosure: public GenCollectedHeap::GenClosure { public: void do_generation(Generation* gen) {
*** 325,334 **** // in the same order in phase2, phase3 and phase4. We don't quite do that // here (perm_gen first rather than last), so we tell the validate code // to use a higher index (saved from phase2) when verifying perm_gen. GenCollectedHeap* gch = GenCollectedHeap::heap(); ! GCTraceTime tm("phase 4", PrintGC && Verbose, true, _gc_timer, _gc_tracer->gc_id()); GenCompactClosure blk; gch->generation_iterate(&blk, true); } --- 582,592 ---- // in the same order in phase2, phase3 and phase4. We don't quite do that // here (perm_gen first rather than last), so we tell the validate code // to use a higher index (saved from phase2) when verifying perm_gen. GenCollectedHeap* gch = GenCollectedHeap::heap(); ! GCTraceTime tm("phase 4", PrintGC && (Verbose || LogCMSParallelFullGC), ! true, _gc_timer, _gc_tracer->gc_id()); GenCompactClosure blk; gch->generation_iterate(&blk, true); }
< prev index next >