--- old/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Feb 10 19:14:41 2014 +++ new/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Feb 10 19:14:39 2014 @@ -575,6 +575,7 @@ _restart_addr(NULL), _overflow_list(NULL), _stats(cmsGen), + _eden_chunk_lock(new Mutex(Mutex::leaf + 1, "CMS_eden_chunk_lock", true)), _eden_chunk_array(NULL), // may be set in ctor body _eden_chunk_capacity(0), // -- ditto -- _eden_chunk_index(0), // -- ditto -- @@ -754,7 +755,7 @@ assert(_eden_chunk_array != NULL || _eden_chunk_capacity == 0, "Error"); // Support for parallelizing survivor space rescan - if (CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) { + if ((CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) || CMSParallelInitialMarkEnabled) { const size_t max_plab_samples = ((DefNewGeneration*)_young_gen)->max_survivor_size()/MinTLABSize; @@ -2136,6 +2137,39 @@ } +void CMSCollector::print_eden_and_survivor_chunk_arrays() { + DefNewGeneration* dng = _young_gen->as_DefNewGeneration(); + EdenSpace* eden_space = dng->eden(); + ContiguousSpace* from_space = dng->from(); + ContiguousSpace* to_space = dng->to(); + // Eden + if (_eden_chunk_array != NULL) { + gclog_or_tty->print_cr("eden " PTR_FORMAT "-" PTR_FORMAT "-" PTR_FORMAT "(" SIZE_FORMAT ")", + eden_space->bottom(), eden_space->top(), + eden_space->end(), eden_space->capacity()); + gclog_or_tty->print_cr("_eden_chunk_index=" SIZE_FORMAT ", " + "_eden_chunk_capacity=" SIZE_FORMAT, + _eden_chunk_index, _eden_chunk_capacity); + for (size_t i = 0; i < _eden_chunk_index; i++) { + gclog_or_tty->print_cr("_eden_chunk_array[" SIZE_FORMAT "]=" PTR_FORMAT, + i, _eden_chunk_array[i]); + } + } + // Survivor + if (_survivor_chunk_array != NULL) { + gclog_or_tty->print_cr("survivor " PTR_FORMAT "-" PTR_FORMAT "-" PTR_FORMAT "(" SIZE_FORMAT ")", + from_space->bottom(), from_space->top(), + from_space->end(), from_space->capacity()); + gclog_or_tty->print_cr("_survivor_chunk_index=" SIZE_FORMAT ", " + "_survivor_chunk_capacity=" SIZE_FORMAT, + _survivor_chunk_index, _survivor_chunk_capacity); + for (size_t i = 0; i < _survivor_chunk_index; i++) { + gclog_or_tty->print_cr("_survivor_chunk_array[" SIZE_FORMAT "]=" PTR_FORMAT, + i, _survivor_chunk_array[i]); + } + } +} + void CMSCollector::getFreelistLocks() const { // Get locks for all free lists in all generations that this // collector is responsible for @@ -3526,6 +3560,31 @@ // CMS work +// The common parts of CMSParInitialMarkTask and CMSParRemarkTask. +class CMSParMarkTask : public AbstractGangTask { + protected: + CMSCollector* _collector; + int _n_workers; + CMSParMarkTask(const char* name, CMSCollector* collector, int n_workers) : + AbstractGangTask(name), + _collector(collector), + _n_workers(n_workers) {} + // Work method in support of parallel rescan ... of young gen spaces + void do_young_space_rescan(uint worker_id, OopsInGenClosure* cl, + ContiguousSpace* space, + HeapWord** chunk_array, size_t chunk_top); + void work_on_young_gen_roots(uint worker_id, OopsInGenClosure* cl); +}; + +// Parallel initial mark task +class CMSParInitialMarkTask: public CMSParMarkTask { + public: + CMSParInitialMarkTask(CMSCollector* collector, int n_workers) : + CMSParMarkTask("Scan roots and young gen for initial mark in parallel", + collector, n_workers) {} + void work(uint worker_id); +}; + // Checkpoint the roots into this generation from outside // this generation. [Note this initial checkpoint need only // be approximate -- we'll do a catch up phase subsequently.] @@ -3615,20 +3674,42 @@ // weak reference processing has not started yet. ref_processor()->set_enqueuing_is_done(false); + if (CMSPrintEdenSurvivorChunks) { + print_eden_and_survivor_chunk_arrays(); + } + { // This is not needed. DEBUG_ONLY(RememberKlassesChecker imx(true);) COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;) - gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. - gch->gen_process_strong_roots(_cmsGen->level(), - true, // younger gens are roots - true, // activate StrongRootsScope - true, // collecting perm gen - SharedHeap::ScanningOption(roots_scanning_options()), - ¬Older, - true, // walk all of code cache if (so & SO_CodeCache) - NULL); + if (CMSParallelInitialMarkEnabled && CollectedHeap::use_parallel_gc_threads()) { + // The parallel version. + FlexibleWorkGang* workers = gch->workers(); + assert(workers != NULL, "Need parallel worker threads."); + int n_workers = workers->active_workers(); + CMSParInitialMarkTask tsk(this, n_workers); + gch->set_par_threads(n_workers); + initialize_sequential_subtasks_for_young_gen_rescan(n_workers); + if (n_workers > 1) { + GenCollectedHeap::StrongRootsScope srs(gch); + workers->run_task(&tsk); + } else { + GenCollectedHeap::StrongRootsScope srs(gch); + tsk.work(0); + } + gch->set_par_threads(0); + } else { + // The serial version. + gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel. + gch->gen_process_strong_roots(_cmsGen->level(), + true, // younger gens are roots + true, // activate StrongRootsScope + true, // collecting perm gen + SharedHeap::ScanningOption(roots_scanning_options()), + ¬Older, + true, // walk all of code cache if (so & SO_CodeCache) + NULL); + } } - // Clear mod-union table; it will be dirtied in the prologue of // CMS generation per each younger generation collection. @@ -4410,7 +4491,9 @@ verify_overflow_empty(); _abort_preclean = false; if (CMSPrecleaningEnabled) { - _eden_chunk_index = 0; + if (!CMSEdenChunksRecordAlways) { + _eden_chunk_index = 0; + } size_t used = get_eden_used(); size_t capacity = get_eden_capacity(); // Don't start sampling unless we will get sufficiently @@ -4519,7 +4602,9 @@ if (!_start_sampling) { return; } - if (_eden_chunk_array) { + // When CMSEdenChunksRecordAlways is true, the eden chunk array + // is populated by the young generation. + if (_eden_chunk_array != NULL && !CMSEdenChunksRecordAlways) { if (_eden_chunk_index < _eden_chunk_capacity) { _eden_chunk_array[_eden_chunk_index] = *_top_addr; // take sample assert(_eden_chunk_array[_eden_chunk_index] <= *_end_addr, @@ -4996,6 +5081,10 @@ // Update the saved marks which may affect the root scans. gch->save_marks(); + if (CMSPrintEdenSurvivorChunks) { + print_eden_and_survivor_chunk_arrays(); + } + { COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;) @@ -5103,10 +5192,51 @@ } } +void CMSParInitialMarkTask::work(uint worker_id) { + elapsedTimer _timer; + ResourceMark rm; + HandleMark hm; + + // ---------- scan from roots -------------- + _timer.start(); + GenCollectedHeap* gch = GenCollectedHeap::heap(); + Par_MarkRefsIntoClosure par_mri_cl(_collector->_span, &(_collector->_markBitMap)); + + // ---------- young gen roots -------------- + { + work_on_young_gen_roots(worker_id, &par_mri_cl); + _timer.stop(); + if (PrintCMSStatistics != 0) { + gclog_or_tty->print_cr( + "Finished young gen initial mark scan work in %dth thread: %3.3f sec", + worker_id, _timer.seconds()); + } + } + + // ---------- remaining roots -------------- + _timer.reset(); + _timer.start(); + gch->gen_process_strong_roots(_collector->_cmsGen->level(), + false, // yg was scanned above + false, // this is parallel code + true, // collecting perm gen + SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()), + &par_mri_cl, + true, // walk all of code cache if (so & SO_CodeCache) + NULL); + assert(_collector->should_unload_classes() + || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_CodeCache), + "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops"); + _timer.stop(); + if (PrintCMSStatistics != 0) { + gclog_or_tty->print_cr( + "Finished remaining root initial mark scan work in %dth thread: %3.3f sec", + worker_id, _timer.seconds()); + } +} + // Parallel remark task -class CMSParRemarkTask: public AbstractGangTask { - CMSCollector* _collector; - int _n_workers; +class CMSParRemarkTask: public CMSParMarkTask { CompactibleFreeListSpace* _cms_space; CompactibleFreeListSpace* _perm_space; @@ -5122,10 +5252,9 @@ CompactibleFreeListSpace* perm_space, int n_workers, FlexibleWorkGang* workers, OopTaskQueueSet* task_queues): - AbstractGangTask("Rescan roots and grey objects in parallel"), - _collector(collector), + CMSParMarkTask("Rescan roots and grey objects in parallel", + collector, n_workers), _cms_space(cms_space), _perm_space(perm_space), - _n_workers(n_workers), _task_queues(task_queues), _term(n_workers, task_queues) { } @@ -5139,11 +5268,6 @@ void work(uint worker_id); private: - // Work method in support of parallel rescan ... of young gen spaces - void do_young_space_rescan(int i, Par_MarkRefsIntoAndScanClosure* cl, - ContiguousSpace* space, - HeapWord** chunk_array, size_t chunk_top); - // ... of dirty cards in old space void do_dirty_card_rescan_tasks(CompactibleFreeListSpace* sp, int i, Par_MarkRefsIntoAndScanClosure* cl); @@ -5152,6 +5276,25 @@ void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed); }; +void CMSParMarkTask::work_on_young_gen_roots(uint worker_id, OopsInGenClosure* cl) { + DefNewGeneration* dng = _collector->_young_gen->as_DefNewGeneration(); + EdenSpace* eden_space = dng->eden(); + ContiguousSpace* from_space = dng->from(); + ContiguousSpace* to_space = dng->to(); + + HeapWord** eca = _collector->_eden_chunk_array; + size_t ect = _collector->_eden_chunk_index; + HeapWord** sca = _collector->_survivor_chunk_array; + size_t sct = _collector->_survivor_chunk_index; + + assert(ect <= _collector->_eden_chunk_capacity, "out of bounds"); + assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds"); + + do_young_space_rescan(worker_id, cl, to_space, NULL, 0); + do_young_space_rescan(worker_id, cl, from_space, sca, sct); + do_young_space_rescan(worker_id, cl, eden_space, eca, ect); +} + // work_queue(i) is passed to the closure // Par_MarkRefsIntoAndScanClosure. The "i" parameter // also is passed to do_dirty_card_rescan_tasks() and to @@ -5176,23 +5319,7 @@ // work first. // ---------- young gen roots -------------- { - DefNewGeneration* dng = _collector->_young_gen->as_DefNewGeneration(); - EdenSpace* eden_space = dng->eden(); - ContiguousSpace* from_space = dng->from(); - ContiguousSpace* to_space = dng->to(); - - HeapWord** eca = _collector->_eden_chunk_array; - size_t ect = _collector->_eden_chunk_index; - HeapWord** sca = _collector->_survivor_chunk_array; - size_t sct = _collector->_survivor_chunk_index; - - assert(ect <= _collector->_eden_chunk_capacity, "out of bounds"); - assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds"); - - do_young_space_rescan(worker_id, &par_mrias_cl, to_space, NULL, 0); - do_young_space_rescan(worker_id, &par_mrias_cl, from_space, sca, sct); - do_young_space_rescan(worker_id, &par_mrias_cl, eden_space, eca, ect); - + work_on_young_gen_roots(worker_id, &par_mrias_cl); _timer.stop(); if (PrintCMSStatistics != 0) { gclog_or_tty->print_cr( @@ -5253,8 +5380,8 @@ // Note that parameter "i" is not used. void -CMSParRemarkTask::do_young_space_rescan(int i, - Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space, +CMSParMarkTask::do_young_space_rescan(uint worker_id, + OopsInGenClosure* cl, ContiguousSpace* space, HeapWord** chunk_array, size_t chunk_top) { // Until all tasks completed: // . claim an unclaimed task @@ -5450,6 +5577,32 @@ "Else our work is not yet done"); } +// Record object boundaries in _eden_chunk_array by sampling the eden +// top in the slow-path eden object allocation code path and record +// the boundaries, if CMSEdenChunksRecordAlways is true. If +// CMSEdenChunksRecordAlways is false, we use the other asynchronous +// sampling in sample_eden() that activates during the part of the +// preclean phase. +void CMSCollector::sample_eden_chunk() { + if (CMSEdenChunksRecordAlways && _eden_chunk_array != NULL) { + if (_eden_chunk_lock->try_lock()) { + // Record a sample. This is the critical section. The contents + // of the _eden_chunk_array have to be non-decreasing in the + // address order. + _eden_chunk_array[_eden_chunk_index] = *_top_addr; + assert(_eden_chunk_array[_eden_chunk_index] <= *_end_addr, + "Unexpected state of Eden"); + if (_eden_chunk_index == 0 || + ((_eden_chunk_array[_eden_chunk_index] > _eden_chunk_array[_eden_chunk_index-1]) && + (pointer_delta(_eden_chunk_array[_eden_chunk_index], + _eden_chunk_array[_eden_chunk_index-1]) >= CMSSamplingGrain))) { + _eden_chunk_index++; // commit sample + } + _eden_chunk_lock->unlock(); + } + } +} + // Return a thread-local PLAB recording array, as appropriate. void* CMSCollector::get_data_recorder(int thr_num) { if (_survivor_plab_array != NULL && @@ -5473,12 +5626,13 @@ // Merge the per-thread plab arrays into the global survivor chunk // array which will provide the partitioning of the survivor space -// for CMS rescan. +// for CMS initial scan and rescan. void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv, int no_of_gc_threads) { assert(_survivor_plab_array != NULL, "Error"); assert(_survivor_chunk_array != NULL, "Error"); - assert(_collectorState == FinalMarking, "Error"); + assert(_collectorState == FinalMarking || + (CMSParallelInitialMarkEnabled && _collectorState == InitialMarking), "Error"); for (int j = 0; j < no_of_gc_threads; j++) { _cursor[j] = 0; } @@ -5541,7 +5695,7 @@ } // Set up the space's par_seq_tasks structure for work claiming -// for parallel rescan of young gen. +// for parallel initial scan and rescan of young gen. // See ParRescanTask where this is currently used. void CMSCollector:: @@ -6691,6 +6845,28 @@ void MarkRefsIntoClosure::do_oop(oop* p) { MarkRefsIntoClosure::do_oop_work(p); } void MarkRefsIntoClosure::do_oop(narrowOop* p) { MarkRefsIntoClosure::do_oop_work(p); } +Par_MarkRefsIntoClosure::Par_MarkRefsIntoClosure( + MemRegion span, CMSBitMap* bitMap): + _span(span), + _bitMap(bitMap) +{ + assert(_ref_processor == NULL, "deliberately left NULL"); + assert(_bitMap->covers(_span), "_bitMap/_span mismatch"); +} + +void Par_MarkRefsIntoClosure::do_oop(oop obj) { + // if p points into _span, then mark corresponding bit in _markBitMap + assert(obj->is_oop(), "expected an oop"); + HeapWord* addr = (HeapWord*)obj; + if (_span.contains(addr)) { + // this should be made more efficient + _bitMap->par_mark(addr); + } +} + +void Par_MarkRefsIntoClosure::do_oop(oop* p) { Par_MarkRefsIntoClosure::do_oop_work(p); } +void Par_MarkRefsIntoClosure::do_oop(narrowOop* p) { Par_MarkRefsIntoClosure::do_oop_work(p); } + // A variant of the above, used for CMS marking verification. MarkRefsIntoVerifyClosure::MarkRefsIntoVerifyClosure( MemRegion span, CMSBitMap* verification_bm, CMSBitMap* cms_bm): @@ -9356,7 +9532,6 @@ return; } } - // Transfer some number of overflown objects to usual marking // stack. Return true if some objects were transferred. bool MarkRefsIntoAndScanClosure::take_from_overflow_list() {