--- old/src/share/vm/gc/g1/g1CollectedHeap.cpp 2017-06-12 13:27:17.691059354 +0200 +++ new/src/share/vm/gc/g1/g1CollectedHeap.cpp 2017-06-12 13:27:17.564055426 +0200 @@ -3274,7 +3274,7 @@ // investigate this in CR 7178365. double sample_end_time_sec = os::elapsedTime(); double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS; - size_t total_cards_scanned = per_thread_states.total_cards_scanned(); + size_t total_cards_scanned = g1_policy()->phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScannedCards); g1_policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc); evacuation_info.set_collectionset_used_before(collection_set()->bytes_used_before()); @@ -3464,11 +3464,9 @@ // treating the nmethods visited to act as roots for concurrent marking. // We only want to make sure that the oops in the nmethods are adjusted with regard to the // objects copied by the current evacuation. - size_t cards_scanned = _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl, - pss->closures()->weak_codeblobs(), - worker_id); - - _pss->add_cards_scanned(worker_id, cards_scanned); + _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl, + pss->closures()->weak_codeblobs(), + worker_id); double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec; --- old/src/share/vm/gc/g1/g1GCPhaseTimes.cpp 2017-06-12 13:27:18.336079303 +0200 +++ new/src/share/vm/gc/g1/g1GCPhaseTimes.cpp 2017-06-12 13:27:18.216075592 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -78,6 +78,13 @@ _gc_par_phases[GCWorkerEnd] = new WorkerDataArray(max_gc_threads, "GC Worker End (ms):"); _gc_par_phases[Other] = new WorkerDataArray(max_gc_threads, "GC Worker Other (ms):"); + _scan_rs_scanned_cards = new WorkerDataArray(max_gc_threads, "Scanned Cards:"); + _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_scanned_cards, ScannedCards); + _scan_rs_claimed_cards = new WorkerDataArray(max_gc_threads, "Claimed Cards:"); + _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_claimed_cards, ClaimedCards); + _scan_rs_skipped_cards = new WorkerDataArray(max_gc_threads, "Skipped Cards:"); + _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_skipped_cards, SkippedCards); + _update_rs_processed_buffers = new WorkerDataArray(max_gc_threads, "Processed Buffers:"); _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers); @@ -210,8 +217,8 @@ _gc_par_phases[phase]->add(worker_i, secs); } -void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) { - _gc_par_phases[phase]->set_thread_work_item(worker_i, count); +void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index) { + _gc_par_phases[phase]->set_thread_work_item(worker_i, count, index); } // return the average time for a phase in milliseconds @@ -219,9 +226,9 @@ return _gc_par_phases[phase]->average() * 1000.0; } -size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) { - assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); - return _gc_par_phases[phase]->thread_work_items()->sum(); +size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) { + assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count"); + return _gc_par_phases[phase]->thread_work_items(index)->sum(); } template @@ -239,11 +246,13 @@ phase->print_summary_on(out, print_sum); details(phase, Indents[indent]); - WorkerDataArray* work_items = phase->thread_work_items(); - if (work_items != NULL) { - out->print("%s", Indents[indent + 1]); - work_items->print_summary_on(out, true); - details(work_items, Indents[indent + 1]); + for (uint i = 0; i < phase->MaxThreadWorkItems; i++) { + WorkerDataArray* work_items = phase->thread_work_items(i); + if (work_items != NULL) { + out->print("%s", Indents[indent + 1]); + work_items->print_summary_on(out, true); + details(work_items, Indents[indent + 1]); + } } } --- old/src/share/vm/gc/g1/g1GCPhaseTimes.hpp 2017-06-12 13:27:18.927097582 +0200 +++ new/src/share/vm/gc/g1/g1GCPhaseTimes.hpp 2017-06-12 13:27:18.805093809 +0200 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -76,6 +76,12 @@ GCParPhasesSentinel }; + enum GCScanRSWorkItems { + ScannedCards, + ClaimedCards, + SkippedCards + }; + private: // Markers for grouping the phases in the GCPhases enum above static const int GCMainParPhasesLast = GCWorkerEnd; @@ -83,8 +89,15 @@ static const int StringDedupPhasesLast = StringDedupTableFixup; WorkerDataArray* _gc_par_phases[GCParPhasesSentinel]; + WorkerDataArray* _update_rs_processed_buffers; + + WorkerDataArray* _scan_rs_scanned_cards; + WorkerDataArray* _scan_rs_claimed_cards; + WorkerDataArray* _scan_rs_skipped_cards; + WorkerDataArray* _termination_attempts; + WorkerDataArray* _redirtied_cards; double _cur_collection_par_time_ms; @@ -170,12 +183,12 @@ // add a number of seconds to a phase void add_time_secs(GCParPhases phase, uint worker_i, double secs); - void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count); + void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index = 0); // return the average time for a phase in milliseconds double average_time_ms(GCParPhases phase); - size_t sum_thread_work_items(GCParPhases phase); + size_t sum_thread_work_items(GCParPhases phase, uint index = 0); public: --- old/src/share/vm/gc/g1/g1ParScanThreadState.cpp 2017-06-12 13:27:19.519115892 +0200 +++ new/src/share/vm/gc/g1/g1ParScanThreadState.cpp 2017-06-12 13:27:19.398112150 +0200 @@ -336,16 +336,6 @@ return _states[worker_id]; } -void G1ParScanThreadStateSet::add_cards_scanned(uint worker_id, size_t cards_scanned) { - assert(worker_id < _n_workers, "out of bounds access"); - _cards_scanned[worker_id] += cards_scanned; -} - -size_t G1ParScanThreadStateSet::total_cards_scanned() const { - assert(_flushed, "thread local state from the per thread states should have been flushed"); - return _total_cards_scanned; -} - const size_t* G1ParScanThreadStateSet::surviving_young_words() const { assert(_flushed, "thread local state from the per thread states should have been flushed"); return _surviving_young_words_total; @@ -353,7 +343,6 @@ void G1ParScanThreadStateSet::flush() { assert(!_flushed, "thread local state from the per thread states should be flushed once"); - assert(_total_cards_scanned == 0, "should have been cleared"); for (uint worker_index = 0; worker_index < _n_workers; ++worker_index) { G1ParScanThreadState* pss = _states[worker_index]; @@ -362,8 +351,6 @@ continue; } - _total_cards_scanned += _cards_scanned[worker_index]; - pss->flush(_surviving_young_words_total); delete pss; _states[worker_index] = NULL; --- old/src/share/vm/gc/g1/g1ParScanThreadState.hpp 2017-06-12 13:27:20.110134171 +0200 +++ new/src/share/vm/gc/g1/g1ParScanThreadState.hpp 2017-06-12 13:27:19.987130367 +0200 @@ -198,8 +198,6 @@ G1CollectedHeap* _g1h; G1ParScanThreadState** _states; size_t* _surviving_young_words_total; - size_t* _cards_scanned; - size_t _total_cards_scanned; size_t _young_cset_length; uint _n_workers; bool _flushed; @@ -209,8 +207,6 @@ _g1h(g1h), _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC)), _surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, young_cset_length, mtGC)), - _cards_scanned(NEW_C_HEAP_ARRAY(size_t, n_workers, mtGC)), - _total_cards_scanned(0), _young_cset_length(young_cset_length), _n_workers(n_workers), _flushed(false) { @@ -218,22 +214,18 @@ _states[i] = NULL; } memset(_surviving_young_words_total, 0, young_cset_length * sizeof(size_t)); - memset(_cards_scanned, 0, n_workers * sizeof(size_t)); } ~G1ParScanThreadStateSet() { assert(_flushed, "thread local state from the per thread states should have been flushed"); FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states); FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total); - FREE_C_HEAP_ARRAY(size_t, _cards_scanned); } void flush(); G1ParScanThreadState* state_for_worker(uint worker_id); - void add_cards_scanned(uint worker_id, size_t cards_scanned); - size_t total_cards_scanned() const; const size_t* surviving_young_words() const; private: --- old/src/share/vm/gc/g1/g1RemSet.cpp 2017-06-12 13:27:20.705152574 +0200 +++ new/src/share/vm/gc/g1/g1RemSet.cpp 2017-06-12 13:27:20.582148770 +0200 @@ -334,8 +334,9 @@ _push_heap_cl(push_heap_cl), _code_root_cl(code_root_cl), _strong_code_root_scan_time_sec(0.0), - _cards(0), - _cards_done(0), + _cards_claimed(0), + _cards_scanned(0), + _cards_skipped(0), _worker_i(worker_i) { _g1h = G1CollectedHeap::heap(); _bot = _g1h->bot(); @@ -354,7 +355,7 @@ _ct_bs->set_card_claimed(index); _push_heap_cl->set_region(r); r->oops_on_card_seq_iterate_careful(mr, _push_heap_cl); - _cards_done++; + _cards_scanned++; } } @@ -389,12 +390,13 @@ claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size); } if (current_card < claimed_card_block) { + _cards_skipped++; continue; } HeapWord* card_start = _g1h->bot()->address_for_index(card_index); HeapRegion* card_region = _g1h->heap_region_containing(card_start); - _cards++; + _cards_claimed++; _scan_state->add_dirty_region(card_region->hrm_index()); @@ -411,21 +413,25 @@ return false; } -size_t G1RemSet::scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure, - CodeBlobClosure* heap_region_codeblobs, - uint worker_i) { +void G1RemSet::scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure, + CodeBlobClosure* heap_region_codeblobs, + uint worker_i) { double rs_time_start = os::elapsedTime(); G1ScanRSClosure cl(_scan_state, oops_in_heap_closure, heap_region_codeblobs, worker_i); _g1->collection_set_iterate_from(&cl, worker_i); - double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) - - cl.strong_code_root_scan_time_sec(); + double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) - + cl.strong_code_root_scan_time_sec(); - _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec); - _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec()); + G1GCPhaseTimes* p = _g1p->phase_times(); - return cl.cards_done(); + p->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec); + p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_scanned(), G1GCPhaseTimes::ScannedCards); + p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_claimed(), G1GCPhaseTimes::ClaimedCards); + p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_skipped(), G1GCPhaseTimes::SkippedCards); + + p->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec()); } // Closure used for updating RSets and recording references that @@ -483,9 +489,9 @@ HeapRegionRemSet::cleanup(); } -size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl, - CodeBlobClosure* heap_region_codeblobs, - uint worker_i) { +void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl, + CodeBlobClosure* heap_region_codeblobs, + uint worker_i) { // A DirtyCardQueue that is used to hold cards containing references // that point into the collection set. This DCQ is associated with a // special DirtyCardQueueSet (see g1CollectedHeap.hpp). Under normal @@ -498,7 +504,7 @@ DirtyCardQueue into_cset_dcq(&_into_cset_dirty_card_queue_set); update_rem_set(&into_cset_dcq, cl, worker_i); - return scan_rem_set(cl, heap_region_codeblobs, worker_i);; + scan_rem_set(cl, heap_region_codeblobs, worker_i);; } void G1RemSet::prepare_for_oops_into_collection_set_do() { --- old/src/share/vm/gc/g1/g1RemSet.hpp 2017-06-12 13:27:21.304171100 +0200 +++ new/src/share/vm/gc/g1/g1RemSet.hpp 2017-06-12 13:27:21.182167327 +0200 @@ -108,12 +108,9 @@ // partitioning the work to be done. It should be the same as // the "i" passed to the calling thread's work(i) function. // In the sequential case this param will be ignored. - // - // Returns the number of cards scanned while looking for pointers - // into the collection set. - size_t oops_into_collection_set_do(G1ParPushHeapRSClosure* cl, - CodeBlobClosure* heap_region_codeblobs, - uint worker_i); + void oops_into_collection_set_do(G1ParPushHeapRSClosure* cl, + CodeBlobClosure* heap_region_codeblobs, + uint worker_i); // Prepare for and cleanup after an oops_into_collection_set_do // call. Must call each of these once before and after (in sequential @@ -123,9 +120,9 @@ void prepare_for_oops_into_collection_set_do(); void cleanup_after_oops_into_collection_set_do(); - size_t scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure, - CodeBlobClosure* heap_region_codeblobs, - uint worker_i); + void scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure, + CodeBlobClosure* heap_region_codeblobs, + uint worker_i); G1RemSetScanState* scan_state() const { return _scan_state; } @@ -185,8 +182,10 @@ class G1ScanRSClosure : public HeapRegionClosure { G1RemSetScanState* _scan_state; - size_t _cards_done; - size_t _cards; + size_t _cards_scanned; + size_t _cards_claimed; + size_t _cards_skipped; + G1CollectedHeap* _g1h; G1ParPushHeapRSClosure* _push_heap_cl; @@ -213,8 +212,9 @@ return _strong_code_root_scan_time_sec; } - size_t cards_done() { return _cards_done;} - size_t cards_looked_up() { return _cards;} + size_t cards_scanned() const { return _cards_scanned; } + size_t cards_claimed() const { return _cards_claimed; } + size_t cards_skipped() const { return _cards_skipped; } }; class UpdateRSOopClosure: public ExtendedOopClosure { --- old/src/share/vm/gc/g1/workerDataArray.hpp 2017-06-12 13:27:21.896189410 +0200 +++ new/src/share/vm/gc/g1/workerDataArray.hpp 2017-06-12 13:27:21.774185637 +0200 @@ -33,20 +33,25 @@ template class WorkerDataArray : public CHeapObj { friend class WDAPrinter; +public: + static const uint MaxThreadWorkItems = 3; +private: T* _data; uint _length; const char* _title; - - WorkerDataArray* _thread_work_items; + + WorkerDataArray* _thread_work_items[MaxThreadWorkItems]; public: WorkerDataArray(uint length, const char* title); ~WorkerDataArray(); - void link_thread_work_items(WorkerDataArray* thread_work_items); - void set_thread_work_item(uint worker_i, size_t value); - WorkerDataArray* thread_work_items() const { - return _thread_work_items; + void link_thread_work_items(WorkerDataArray* thread_work_items, uint index = 0); + void set_thread_work_item(uint worker_i, size_t value, uint index = 0); + void add_thread_work_item(uint worker_i, size_t value, uint index = 0); + WorkerDataArray* thread_work_items(uint index = 0) const { + assert(index < MaxThreadWorkItems, "Tried to access thread work item %u max %u", index, MaxThreadWorkItems); + return _thread_work_items[index]; } static T uninitialized(); --- old/src/share/vm/gc/g1/workerDataArray.inline.hpp 2017-06-12 13:27:22.490207782 +0200 +++ new/src/share/vm/gc/g1/workerDataArray.inline.hpp 2017-06-12 13:27:22.368204009 +0200 @@ -32,11 +32,13 @@ template WorkerDataArray::WorkerDataArray(uint length, const char* title) : _title(title), - _length(0), - _thread_work_items(NULL) { + _length(0) { assert(length > 0, "Must have some workers to store data for"); _length = length; _data = NEW_C_HEAP_ARRAY(T, _length, mtGC); + for (uint i = 0; i < MaxThreadWorkItems; i++) { + _thread_work_items[i] = NULL; + } reset(); } @@ -59,14 +61,23 @@ } template -void WorkerDataArray::link_thread_work_items(WorkerDataArray* thread_work_items) { - _thread_work_items = thread_work_items; +void WorkerDataArray::link_thread_work_items(WorkerDataArray* thread_work_items, uint index) { + assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems); + _thread_work_items[index] = thread_work_items; } template -void WorkerDataArray::set_thread_work_item(uint worker_i, size_t value) { - assert(_thread_work_items != NULL, "No sub count"); - _thread_work_items->set(worker_i, value); +void WorkerDataArray::set_thread_work_item(uint worker_i, size_t value, uint index) { + assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems); + assert(_thread_work_items[index] != NULL, "No sub count"); + _thread_work_items[index]->set(worker_i, value); +} + +template +void WorkerDataArray::add_thread_work_item(uint worker_i, size_t value, uint index) { + assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems); + assert(_thread_work_items[index] != NULL, "No sub count"); + _thread_work_items[index]->add(worker_i, value); } template @@ -148,8 +159,10 @@ template void WorkerDataArray::reset() { set_all(uninitialized()); - if (_thread_work_items != NULL) { - _thread_work_items->reset(); + for (uint i = 0; i < MaxThreadWorkItems; i++) { + if (_thread_work_items[i] != NULL) { + _thread_work_items[i]->reset(); + } } } --- old/test/gc/g1/TestGCLogMessages.java 2017-06-12 13:27:23.086226216 +0200 +++ new/test/gc/g1/TestGCLogMessages.java 2017-06-12 13:27:22.964222443 +0200 @@ -94,7 +94,14 @@ new LogMessageWithLevel("Other", Level.INFO), // Update RS + new LogMessageWithLevel("Update RS", Level.DEBUG), + new LogMessageWithLevel("Processed Buffers", Level.DEBUG), new LogMessageWithLevel("Scan HCC", Level.TRACE), + // Scan RS + new LogMessageWithLevel("Scan RS", Level.DEBUG), + new LogMessageWithLevel("Scanned Cards", Level.DEBUG), + new LogMessageWithLevel("Claimed Cards", Level.DEBUG), + new LogMessageWithLevel("Skipped Cards", Level.DEBUG), // Ext Root Scan new LogMessageWithLevel("Thread Roots", Level.TRACE), new LogMessageWithLevel("StringTable Roots", Level.TRACE), --- old/test/native/gc/g1/test_workerDataArray.cpp 2017-06-12 13:27:23.679244557 +0200 +++ new/test/native/gc/g1/test_workerDataArray.cpp 2017-06-12 13:27:23.556240752 +0200 @@ -34,7 +34,11 @@ protected: WorkerDataArrayTest() : title("Test array"), - array(3, title) { + array(3, title), + sub_item_title("Sub item array"), + sub_item(3, sub_item_title) { + + array.link_thread_work_items(&sub_item); } const char* print_summary() { @@ -65,6 +69,9 @@ const char* title; WorkerDataArray array; + const char* sub_item_title; + WorkerDataArray sub_item; + private: virtual const char* expected_summary() = 0; virtual const char* expected_details() = 0; @@ -111,6 +118,10 @@ array.set(0, 5); array.set(1, 3); array.set(2, 7); + + array.set_thread_work_item(0, 1); + array.set_thread_work_item(1, 2); + array.set_thread_work_item(2, 3); } private: @@ -125,10 +136,12 @@ TEST_VM_F(BasicWorkerDataArrayTest, sum_test) { ASSERT_EQ(15u, array.sum()); + ASSERT_EQ(6u, array.thread_work_items(0)->sum()); } TEST_VM_F(BasicWorkerDataArrayTest, average_test) { ASSERT_NEAR(5.0, array.average(), epsilon); + ASSERT_NEAR(2.0, array.thread_work_items(0)->average(), epsilon); } TEST_VM_F(BasicWorkerDataArrayTest, print_summary_on_test) { @@ -149,6 +162,16 @@ for (uint i = 0; i < 3; i++) { array.add(i, 1); } + + WorkerDataArray* sub_items = array.thread_work_items(0); + + sub_items->set(0, 1); + sub_items->set(1, 2); + sub_items->set(2, 3); + + for (uint i = 0; i < 3; i++) { + array.add_thread_work_item(i, 1); + } } private: @@ -163,10 +186,12 @@ TEST_VM_F(AddWorkerDataArrayTest, sum_test) { ASSERT_EQ(18u, array.sum()); + ASSERT_EQ(9u, array.thread_work_items(0)->sum()); } TEST_VM_F(AddWorkerDataArrayTest, average_test) { ASSERT_NEAR(6.0, array.average(), epsilon); + ASSERT_NEAR(3.0, array.thread_work_items(0)->average(), epsilon); } TEST_VM_F(AddWorkerDataArrayTest, print_summary_on_test) {