--- old/src/share/vm/gc/g1/g1CollectedHeap.cpp 2015-08-13 12:34:54.140423901 +0200 +++ new/src/share/vm/gc/g1/g1CollectedHeap.cpp 2015-08-13 12:34:54.060421528 +0200 @@ -4382,6 +4382,13 @@ } class G1ParEvacuateFollowersClosure : public VoidClosure { +private: + double _start_term; + double _term_time; + size_t _term_attempts; + + void start_term_time() { _term_attempts++; _start_term = os::elapsedTime(); } + void end_term_time() { _term_time += os::elapsedTime() - _start_term; } protected: G1CollectedHeap* _g1h; G1ParScanThreadState* _par_scan_state; @@ -4398,19 +4405,23 @@ RefToScanQueueSet* queues, ParallelTaskTerminator* terminator) : _g1h(g1h), _par_scan_state(par_scan_state), - _queues(queues), _terminator(terminator) {} + _queues(queues), _terminator(terminator), + _start_term(0.0), _term_time(0.0), _term_attempts(0) {} void do_void(); + double term_time() const { return _term_time; } + size_t term_attempts() const { return _term_attempts; } + private: inline bool offer_termination(); }; bool G1ParEvacuateFollowersClosure::offer_termination() { G1ParScanThreadState* const pss = par_scan_state(); - pss->start_term_time(); + start_term_time(); const bool res = terminator()->offer_termination(); - pss->end_term_time(); + end_term_time(); return res; } @@ -4451,15 +4462,17 @@ class G1ParTask : public AbstractGangTask { protected: G1CollectedHeap* _g1h; - RefToScanQueueSet *_queues; + G1ParScanThreadState** _pss; + RefToScanQueueSet* _queues; G1RootProcessor* _root_processor; ParallelTaskTerminator _terminator; uint _n_workers; public: - G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers) + G1ParTask(G1CollectedHeap* g1h, G1ParScanThreadState** per_thread_states, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers) : AbstractGangTask("G1 collection"), _g1h(g1h), + _pss(per_thread_states), _queues(task_queues), _root_processor(root_processor), _terminator(n_workers, _queues), @@ -4506,7 +4519,8 @@ void work(uint worker_id) { if (worker_id >= _n_workers) return; // no work needed this round - _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime()); + double start_sec = os::elapsedTime(); + _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, start_sec); { ResourceMark rm; @@ -4514,23 +4528,24 @@ ReferenceProcessor* rp = _g1h->ref_processor_stw(); - G1ParScanThreadState pss(_g1h, worker_id, rp); + G1ParScanThreadState* pss = _pss[worker_id]; + pss->set_ref_processor(rp); bool only_young = _g1h->collector_state()->gcs_are_young(); // Non-IM young GC. - G1ParCopyClosure scan_only_root_cl(_g1h, &pss, rp); + G1ParCopyClosure scan_only_root_cl(_g1h, pss, rp); G1CLDClosure scan_only_cld_cl(&scan_only_root_cl, only_young, // Only process dirty klasses. false); // No need to claim CLDs. // IM young GC. // Strong roots closures. - G1ParCopyClosure scan_mark_root_cl(_g1h, &pss, rp); + G1ParCopyClosure scan_mark_root_cl(_g1h, pss, rp); G1CLDClosure scan_mark_cld_cl(&scan_mark_root_cl, false, // Process all klasses. true); // Need to claim CLDs. // Weak roots closures. - G1ParCopyClosure scan_mark_weak_root_cl(_g1h, &pss, rp); + G1ParCopyClosure scan_mark_weak_root_cl(_g1h, pss, rp); G1CLDClosure scan_mark_weak_cld_cl(&scan_mark_weak_root_cl, false, // Process all klasses. true); // Need to claim CLDs. @@ -4561,8 +4576,7 @@ weak_cld_cl = &scan_only_cld_cl; } - pss.start_strong_roots(); - + double start_strong_roots_sec = os::elapsedTime(); _root_processor->evacuate_roots(strong_root_cl, weak_root_cl, strong_cld_cl, @@ -4570,32 +4584,49 @@ trace_metadata, worker_id); - G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); + G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss); _root_processor->scan_remembered_sets(&push_heap_rs_cl, weak_root_cl, worker_id); - pss.end_strong_roots(); + double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec; + double term_sec = 0.0; + size_t evac_term_attempts = 0; { double start = os::elapsedTime(); - G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator); + G1ParEvacuateFollowersClosure evac(_g1h, pss, _queues, &_terminator); evac.do_void(); + + evac_term_attempts = evac.term_attempts(); + term_sec = evac.term_time(); double elapsed_sec = os::elapsedTime() - start; - double term_sec = pss.term_time(); _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec); _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec); - _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts()); + _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, evac_term_attempts); } - _g1h->g1_policy()->record_thread_age_table(pss.age_table()); - _g1h->update_surviving_young_words(pss.surviving_young_words()+1); + + // Flush any statistics. + _g1h->g1_policy()->record_thread_age_table(pss->age_table()); + _g1h->update_surviving_young_words(pss->surviving_young_words()); + + assert(pss->queue_is_empty(), "should be empty"); if (PrintTerminationStats) { MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); - pss.print_termination_stats(); + size_t lab_waste; + size_t lab_undo_waste; + pss->waste(lab_waste, lab_undo_waste); + _g1h->print_termination_stats(gclog_or_tty, + worker_id, + (os::elapsedTime() - start_sec) * 1000.0, /* elapsed time */ + strong_roots_sec * 1000.0, /* strong roots time */ + term_sec * 1000.0, /* evac term time */ + evac_term_attempts, /* evac term attempts */ + lab_waste, /* alloc buffer waste */ + lab_undo_waste /* undo waste */ + ); } - assert(pss.queue_is_empty(), "should be empty"); - // Close the inner scope so that the ResourceMark and HandleMark // destructors are executed here and are included as part of the // "GC Worker Time". @@ -4604,6 +4635,31 @@ } }; +void G1CollectedHeap::print_termination_stats_hdr(outputStream* const st) { + st->print_raw_cr("GC Termination Stats"); + st->print_raw_cr(" elapsed --strong roots-- -------termination------- ------waste (KiB)------"); + st->print_raw_cr("thr ms ms % ms % attempts total alloc undo"); + st->print_raw_cr("--- --------- --------- ------ --------- ------ -------- ------- ------- -------"); +} + +void G1CollectedHeap::print_termination_stats(outputStream* const st, + uint worker_id, + double elapsed_ms, + double strong_roots_ms, + double term_ms, + size_t term_attempts, + size_t alloc_buffer_waste, + size_t undo_waste) const { + st->print_cr("%3d %9.2f %9.2f %6.2f " + "%9.2f %6.2f " SIZE_FORMAT_W(8) " " + SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7), + worker_id, elapsed_ms, strong_roots_ms, strong_roots_ms * 100 / elapsed_ms, + term_ms, term_ms * 100 / elapsed_ms, term_attempts, + (alloc_buffer_waste + undo_waste) * HeapWordSize / K, + alloc_buffer_waste * HeapWordSize / K, + undo_waste * HeapWordSize / K); +} + class G1StringSymbolTableUnlinkTask : public AbstractGangTask { private: BoolObjectClosure* _is_alive; @@ -5132,17 +5188,20 @@ class G1STWRefProcTaskExecutor: public AbstractRefProcTaskExecutor { private: - G1CollectedHeap* _g1h; - RefToScanQueueSet* _queues; - FlexibleWorkGang* _workers; - uint _active_workers; + G1CollectedHeap* _g1h; + G1ParScanThreadState** _pss; + RefToScanQueueSet* _queues; + FlexibleWorkGang* _workers; + uint _active_workers; public: G1STWRefProcTaskExecutor(G1CollectedHeap* g1h, + G1ParScanThreadState** per_thread_states, FlexibleWorkGang* workers, RefToScanQueueSet *task_queues, uint n_workers) : _g1h(g1h), + _pss(per_thread_states), _queues(task_queues), _workers(workers), _active_workers(n_workers) @@ -5161,17 +5220,20 @@ typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; ProcessTask& _proc_task; G1CollectedHeap* _g1h; - RefToScanQueueSet *_task_queues; + G1ParScanThreadState** _pss; + RefToScanQueueSet* _task_queues; ParallelTaskTerminator* _terminator; public: G1STWRefProcTaskProxy(ProcessTask& proc_task, - G1CollectedHeap* g1h, - RefToScanQueueSet *task_queues, - ParallelTaskTerminator* terminator) : + G1CollectedHeap* g1h, + G1ParScanThreadState** per_thread_states, + RefToScanQueueSet *task_queues, + ParallelTaskTerminator* terminator) : AbstractGangTask("Process reference objects in parallel"), _proc_task(proc_task), _g1h(g1h), + _pss(per_thread_states), _task_queues(task_queues), _terminator(terminator) {} @@ -5183,11 +5245,12 @@ G1STWIsAliveClosure is_alive(_g1h); - G1ParScanThreadState pss(_g1h, worker_id, NULL); + G1ParScanThreadState* pss = _pss[worker_id]; + pss->set_ref_processor(NULL); - G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, &pss, NULL); + G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, pss, NULL); - G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL); + G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL); OopClosure* copy_non_heap_cl = &only_copy_non_heap_cl; @@ -5197,10 +5260,10 @@ } // Keep alive closure. - G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss); + G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, pss); // Complete GC closure - G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator); + G1ParEvacuateFollowersClosure drain_queue(_g1h, pss, _task_queues, _terminator); // Call the reference processing task's work routine. _proc_task.work(worker_id, is_alive, keep_alive, drain_queue); @@ -5219,7 +5282,7 @@ assert(_workers != NULL, "Need parallel worker threads."); ParallelTaskTerminator terminator(_active_workers, _queues); - G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _queues, &terminator); + G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, &terminator); _workers->run_task(&proc_task_proxy); } @@ -5261,15 +5324,17 @@ class G1ParPreserveCMReferentsTask: public AbstractGangTask { protected: - G1CollectedHeap* _g1h; - RefToScanQueueSet *_queues; + G1CollectedHeap* _g1h; + G1ParScanThreadState** _pss; + RefToScanQueueSet* _queues; ParallelTaskTerminator _terminator; uint _n_workers; public: - G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h, uint workers, RefToScanQueueSet *task_queues) : + G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h, G1ParScanThreadState** per_thread_states, int workers, RefToScanQueueSet *task_queues) : AbstractGangTask("ParPreserveCMReferents"), _g1h(g1h), + _pss(per_thread_states), _queues(task_queues), _terminator(workers, _queues), _n_workers(workers) @@ -5279,12 +5344,13 @@ ResourceMark rm; HandleMark hm; - G1ParScanThreadState pss(_g1h, worker_id, NULL); - assert(pss.queue_is_empty(), "both queue and overflow should be empty"); + G1ParScanThreadState* pss = _pss[worker_id]; + pss->set_ref_processor(NULL); + assert(pss->queue_is_empty(), "both queue and overflow should be empty"); - G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, &pss, NULL); + G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, pss, NULL); - G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL); + G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL); OopClosure* copy_non_heap_cl = &only_copy_non_heap_cl; @@ -5298,7 +5364,7 @@ // Copying keep alive closure. Applied to referent objects that need // to be copied. - G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss); + G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, pss); ReferenceProcessor* rp = _g1h->ref_processor_cm(); @@ -5331,15 +5397,15 @@ } // Drain the queue - which may cause stealing - G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _queues, &_terminator); + G1ParEvacuateFollowersClosure drain_queue(_g1h, pss, _queues, &_terminator); drain_queue.do_void(); // Allocation buffers were retired at the end of G1ParEvacuateFollowersClosure - assert(pss.queue_is_empty(), "should be"); + assert(pss->queue_is_empty(), "should be"); } }; // Weak Reference processing during an evacuation pause (part 1). -void G1CollectedHeap::process_discovered_references() { +void G1CollectedHeap::process_discovered_references(G1ParScanThreadState** per_thread_states) { double ref_proc_start = os::elapsedTime(); ReferenceProcessor* rp = _ref_processor_stw; @@ -5369,6 +5435,7 @@ uint no_of_gc_workers = workers()->active_workers(); G1ParPreserveCMReferentsTask keep_cm_referents(this, + per_thread_states, no_of_gc_workers, _task_queues); @@ -5383,16 +5450,17 @@ // JNI refs. // Use only a single queue for this PSS. - G1ParScanThreadState pss(this, 0, NULL); - assert(pss.queue_is_empty(), "pre-condition"); + G1ParScanThreadState* pss = per_thread_states[0]; + pss->set_ref_processor(NULL); + assert(pss->queue_is_empty(), "pre-condition"); // We do not embed a reference processor in the copying/scanning // closures while we're actually processing the discovered // reference objects. - G1ParScanExtRootClosure only_copy_non_heap_cl(this, &pss, NULL); + G1ParScanExtRootClosure only_copy_non_heap_cl(this, pss, NULL); - G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, &pss, NULL); + G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, pss, NULL); OopClosure* copy_non_heap_cl = &only_copy_non_heap_cl; @@ -5402,10 +5470,10 @@ } // Keep alive closure. - G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, &pss); + G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, pss); // Serial Complete GC closure - G1STWDrainQueueClosure drain_queue(this, &pss); + G1STWDrainQueueClosure drain_queue(this, pss); // Setup the soft refs policy... rp->setup_policy(false); @@ -5424,7 +5492,7 @@ assert(rp->num_q() == no_of_gc_workers, "sanity"); assert(no_of_gc_workers <= rp->max_num_q(), "sanity"); - G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, no_of_gc_workers); + G1STWRefProcTaskExecutor par_task_executor(this, per_thread_states, workers(), _task_queues, no_of_gc_workers); stats = rp->process_discovered_references(&is_alive, &keep_alive, &drain_queue, @@ -5436,14 +5504,14 @@ _gc_tracer_stw->report_gc_reference_stats(stats); // We have completed copying any necessary live referent objects. - assert(pss.queue_is_empty(), "both queue and overflow should be empty"); + assert(pss->queue_is_empty(), "both queue and overflow should be empty"); double ref_proc_time = os::elapsedTime() - ref_proc_start; g1_policy()->phase_times()->record_ref_proc_time(ref_proc_time * 1000.0); } // Weak Reference processing during an evacuation pause (part 2). -void G1CollectedHeap::enqueue_discovered_references() { +void G1CollectedHeap::enqueue_discovered_references(G1ParScanThreadState** per_thread_states) { double ref_enq_start = os::elapsedTime(); ReferenceProcessor* rp = _ref_processor_stw; @@ -5462,7 +5530,7 @@ assert(rp->num_q() == n_workers, "sanity"); assert(n_workers <= rp->max_num_q(), "sanity"); - G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, n_workers); + G1STWRefProcTaskExecutor par_task_executor(this, per_thread_states, workers(), _task_queues, n_workers); rp->enqueue_discovered_references(&par_task_executor); } @@ -5498,9 +5566,14 @@ double start_par_time_sec = os::elapsedTime(); double end_par_time_sec; + G1ParScanThreadState** per_thread_states = NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC); + for (uint i = 0; i < n_workers; i++) { + per_thread_states[i] = new_par_scan_state(i); + } + { G1RootProcessor root_processor(this, n_workers); - G1ParTask g1_par_task(this, _task_queues, &root_processor, n_workers); + G1ParTask g1_par_task(this, per_thread_states, _task_queues, &root_processor, n_workers); // InitialMark needs claim bits to keep track of the marked-through CLDs. if (collector_state()->during_initial_mark_pause()) { ClassLoaderDataGraph::clear_claimed_marks(); @@ -5508,7 +5581,7 @@ // The individual threads will set their evac-failure closures. if (PrintTerminationStats) { - G1ParScanThreadState::print_termination_stats_hdr(); + print_termination_stats_hdr(gclog_or_tty); } workers()->run_task(&g1_par_task); @@ -5535,7 +5608,7 @@ // as we may have to copy some 'reachable' referent // objects (and their reachable sub-graphs) that were // not copied during the pause. - process_discovered_references(); + process_discovered_references(per_thread_states); if (G1StringDedup::is_enabled()) { double fixup_start = os::elapsedTime(); @@ -5551,6 +5624,12 @@ _allocator->release_gc_alloc_regions(evacuation_info); g1_rem_set()->cleanup_after_oops_into_collection_set_do(); + for (uint i = 0; i < n_workers; i++) { + G1ParScanThreadState* pss = per_thread_states[i]; + delete pss; + } + FREE_C_HEAP_ARRAY(G1ParScanThreadState*, per_thread_states); + record_obj_copy_mem_stats(); // Reset and re-enable the hot card cache. @@ -5577,7 +5656,7 @@ // will log these updates (and dirty their associated // cards). We need these updates logged to update any // RSets. - enqueue_discovered_references(); + enqueue_discovered_references(per_thread_states); redirty_logged_cards(); COMPILER2_PRESENT(DerivedPointerTable::update_pointers());