Print this page
rev 3708 : 8000244: G1: Ergonomically set MarkStackSize and use virtual space for global marking stack
Summary: Set the value of MarkStackSize to a value based on the number of parallel marking threads with a reasonable minimum. Expand the marking stack if we have to restart marking due to an overflow up to a reasonable maximum. Allocate the underlying space for the marking stack from virtual memory.
Reviewed-by: jmasa
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 32 #include "gc_implementation/g1/g1Log.hpp"
33 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
34 34 #include "gc_implementation/g1/g1RemSet.hpp"
35 35 #include "gc_implementation/g1/heapRegion.inline.hpp"
36 36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
37 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
38 38 #include "gc_implementation/shared/vmGCOperations.hpp"
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
39 39 #include "memory/genOopClosures.inline.hpp"
40 40 #include "memory/referencePolicy.hpp"
41 41 #include "memory/resourceArea.hpp"
42 42 #include "oops/oop.inline.hpp"
43 43 #include "runtime/handles.inline.hpp"
44 44 #include "runtime/java.hpp"
45 45 #include "services/memTracker.hpp"
46 46
47 47 // Concurrent marking bit map wrapper
48 48
49 -CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
50 - _bm((uintptr_t*)NULL,0),
49 +CMBitMapRO::CMBitMapRO(int shifter) :
50 + _bm(),
51 51 _shifter(shifter) {
52 - _bmStartWord = (HeapWord*)(rs.base());
53 - _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
54 - ReservedSpace brs(ReservedSpace::allocation_align_size_up(
55 - (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
56 -
57 - MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
58 -
59 - guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
60 - // For now we'll just commit all of the bit map up fromt.
61 - // Later on we'll try to be more parsimonious with swap.
62 - guarantee(_virtual_space.initialize(brs, brs.size()),
63 - "couldn't reseve backing store for concurrent marking bit map");
64 - assert(_virtual_space.committed_size() == brs.size(),
65 - "didn't reserve backing store for all of concurrent marking bit map?");
66 - _bm.set_map((uintptr_t*)_virtual_space.low());
67 - assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
68 - _bmWordSize, "inconsistency in bit map sizing");
69 - _bm.set_size(_bmWordSize >> _shifter);
52 + _bmStartWord = 0;
53 + _bmWordSize = 0;
70 54 }
71 55
72 56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
73 57 HeapWord* limit) const {
74 58 // First we must round addr *up* to a possible object boundary.
75 59 addr = (HeapWord*)align_size_up((intptr_t)addr,
76 60 HeapWordSize << _shifter);
77 61 size_t addrOffset = heapWordToOffset(addr);
78 62 if (limit == NULL) {
79 63 limit = _bmStartWord + _bmWordSize;
80 64 }
81 65 size_t limitOffset = heapWordToOffset(limit);
82 66 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
83 67 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
84 68 assert(nextAddr >= addr, "get_next_one postcondition");
85 69 assert(nextAddr == limit || isMarked(nextAddr),
86 70 "get_next_one postcondition");
87 71 return nextAddr;
88 72 }
89 73
90 74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
91 75 HeapWord* limit) const {
92 76 size_t addrOffset = heapWordToOffset(addr);
93 77 if (limit == NULL) {
94 78 limit = _bmStartWord + _bmWordSize;
95 79 }
96 80 size_t limitOffset = heapWordToOffset(limit);
97 81 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
98 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
99 83 assert(nextAddr >= addr, "get_next_one postcondition");
100 84 assert(nextAddr == limit || !isMarked(nextAddr),
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
101 85 "get_next_one postcondition");
102 86 return nextAddr;
103 87 }
104 88
105 89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
106 90 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
107 91 return (int) (diff >> _shifter);
108 92 }
109 93
110 94 #ifndef PRODUCT
111 -bool CMBitMapRO::covers(ReservedSpace rs) const {
95 +bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
112 96 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
113 97 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
114 98 "size inconsistency");
115 - return _bmStartWord == (HeapWord*)(rs.base()) &&
116 - _bmWordSize == rs.size()>>LogHeapWordSize;
99 + return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
100 + _bmWordSize == heap_rs.size()>>LogHeapWordSize;
117 101 }
118 102 #endif
119 103
104 +bool CMBitMap::allocate(ReservedSpace heap_rs) {
105 + _bmStartWord = (HeapWord*)(heap_rs.base());
106 + _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes
107 + ReservedSpace brs(ReservedSpace::allocation_align_size_up(
108 + (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
109 + if (!brs.is_reserved()) {
110 + warning("ConcurrentMark marking bit map allocation failure");
111 + return false;
112 + }
113 + MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
114 + // For now we'll just commit all of the bit map up front.
115 + // Later on we'll try to be more parsimonious with swap.
116 + if (!_virtual_space.initialize(brs, brs.size())) {
117 + warning("ConcurrentMark marking bit map backing store failure");
118 + return false;
119 + }
120 + assert(_virtual_space.committed_size() == brs.size(),
121 + "didn't reserve backing store for all of concurrent marking bit map?");
122 + _bm.set_map((uintptr_t*)_virtual_space.low());
123 + assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
124 + _bmWordSize, "inconsistency in bit map sizing");
125 + _bm.set_size(_bmWordSize >> _shifter);
126 + return true;
127 +}
128 +
120 129 void CMBitMap::clearAll() {
121 130 _bm.clear();
122 131 return;
123 132 }
124 133
125 134 void CMBitMap::markRange(MemRegion mr) {
126 135 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
127 136 assert(!mr.is_empty(), "unexpected empty region");
128 137 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
129 138 ((HeapWord *) mr.end())),
130 139 "markRange memory region end is not card aligned");
131 140 // convert address range into offset range
132 141 _bm.at_put_range(heapWordToOffset(mr.start()),
133 142 heapWordToOffset(mr.end()), true);
134 143 }
135 144
136 145 void CMBitMap::clearRange(MemRegion mr) {
137 146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
138 147 assert(!mr.is_empty(), "unexpected empty region");
139 148 // convert address range into offset range
140 149 _bm.at_put_range(heapWordToOffset(mr.start()),
141 150 heapWordToOffset(mr.end()), false);
142 151 }
143 152
144 153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
145 154 HeapWord* end_addr) {
146 155 HeapWord* start = getNextMarkedWordAddress(addr);
147 156 start = MIN2(start, end_addr);
148 157 HeapWord* end = getNextUnmarkedWordAddress(start);
149 158 end = MIN2(end, end_addr);
150 159 assert(start <= end, "Consistency check");
151 160 MemRegion mr(start, end);
152 161 if (!mr.is_empty()) {
153 162 clearRange(mr);
154 163 }
155 164 return mr;
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
156 165 }
157 166
158 167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
159 168 _base(NULL), _cm(cm)
160 169 #ifdef ASSERT
161 170 , _drain_in_progress(false)
162 171 , _drain_in_progress_yields(false)
163 172 #endif
164 173 {}
165 174
166 -void CMMarkStack::allocate(size_t size) {
167 - _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
168 - if (_base == NULL) {
169 - vm_exit_during_initialization("Failed to allocate CM region mark stack");
175 +bool CMMarkStack::allocate(size_t capacity) {
176 + // allocate a stack of the requisite depth
177 + ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
178 + if (!rs.is_reserved()) {
179 + warning("ConcurrentMark MarkStack allocation failure");
180 + return false;
170 181 }
171 - _index = 0;
172 - _capacity = (jint) size;
182 + MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
183 + if (!_virtual_space.initialize(rs, rs.size())) {
184 + warning("ConcurrentMark MarkStack backing store failure");
185 + // Release the virtual memory reserved for the marking stack
186 + rs.release();
187 + return false;
188 + }
189 + assert(_virtual_space.committed_size() == rs.size(),
190 + "Didn't reserve backing store for all of ConcurrentMark stack?");
191 + _base = (oop*) _virtual_space.low();
192 + setEmpty();
193 + _capacity = (jint) capacity;
173 194 _saved_index = -1;
174 195 NOT_PRODUCT(_max_depth = 0);
196 + return true;
197 +}
198 +
199 +void CMMarkStack::expand() {
200 + // Called, during remark, if we've overflown the marking stack during marking.
201 + assert(isEmpty(), "stack should been emptied while handling overflow");
202 + assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
203 + // Clear expansion flag
204 + _should_expand = false;
205 + if (_capacity == (jint) MarkStackSizeMax) {
206 + if (PrintGCDetails && Verbose) {
207 + gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
208 + }
209 + return;
210 + }
211 + // Double capacity if possible
212 + jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
213 + // Do not give up existing stack until we have managed to
214 + // get the double capacity that we desired.
215 + ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
216 + sizeof(oop)));
217 + if (rs.is_reserved()) {
218 + // Release the backing store associated with old stack
219 + _virtual_space.release();
220 + // Reinitialize virtual space for new stack
221 + if (!_virtual_space.initialize(rs, rs.size())) {
222 + fatal("Not enough swap for expanded marking stack capacity");
223 + }
224 + _base = (oop*)(_virtual_space.low());
225 + _index = 0;
226 + _capacity = new_capacity;
227 + } else {
228 + if (PrintGCDetails && Verbose) {
229 + // Failed to double capacity, continue;
230 + gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
231 + SIZE_FORMAT"K to " SIZE_FORMAT"K",
232 + _capacity / K, new_capacity / K);
233 + }
234 + }
235 +}
236 +
237 +void CMMarkStack::set_should_expand() {
238 + // If we're resetting the marking state because of an
239 + // marking stack overflow, record that we should, if
240 + // possible, expand the stack.
241 + _should_expand = _cm->has_overflown();
175 242 }
176 243
177 244 CMMarkStack::~CMMarkStack() {
178 245 if (_base != NULL) {
179 - FREE_C_HEAP_ARRAY(oop, _base, mtGC);
246 + _base = NULL;
247 + _virtual_space.release();
180 248 }
181 249 }
182 250
183 251 void CMMarkStack::par_push(oop ptr) {
184 252 while (true) {
185 253 if (isFull()) {
186 254 _overflow = true;
187 255 return;
188 256 }
189 257 // Otherwise...
190 258 jint index = _index;
191 259 jint next_index = index+1;
192 260 jint res = Atomic::cmpxchg(next_index, &_index, index);
193 261 if (res == index) {
194 262 _base[index] = ptr;
195 263 // Note that we don't maintain this atomically. We could, but it
196 264 // doesn't seem necessary.
197 265 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
198 266 return;
199 267 }
200 268 // Otherwise, we need to try again.
201 269 }
202 270 }
203 271
204 272 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
205 273 while (true) {
206 274 if (isFull()) {
207 275 _overflow = true;
208 276 return;
209 277 }
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
210 278 // Otherwise...
211 279 jint index = _index;
212 280 jint next_index = index + n;
213 281 if (next_index > _capacity) {
214 282 _overflow = true;
215 283 return;
216 284 }
217 285 jint res = Atomic::cmpxchg(next_index, &_index, index);
218 286 if (res == index) {
219 287 for (int i = 0; i < n; i++) {
220 - int ind = index + i;
288 + int ind = index + i;
221 289 assert(ind < _capacity, "By overflow test above.");
222 290 _base[ind] = ptr_arr[i];
223 291 }
224 292 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
225 293 return;
226 294 }
227 295 // Otherwise, we need to try again.
228 296 }
229 297 }
230 298
231 -
232 299 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
233 300 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
234 301 jint start = _index;
235 302 jint next_index = start + n;
236 303 if (next_index > _capacity) {
237 304 _overflow = true;
238 305 return;
239 306 }
240 307 // Otherwise.
241 308 _index = next_index;
242 309 for (int i = 0; i < n; i++) {
243 310 int ind = start + i;
244 311 assert(ind < _capacity, "By overflow test above.");
245 312 _base[ind] = ptr_arr[i];
246 313 }
314 + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
247 315 }
248 316
249 -
250 317 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
251 318 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
252 319 jint index = _index;
253 320 if (index == 0) {
254 321 *n = 0;
255 322 return false;
256 323 } else {
257 324 int k = MIN2(max, index);
258 - jint new_ind = index - k;
325 + jint new_ind = index - k;
259 326 for (int j = 0; j < k; j++) {
260 327 ptr_arr[j] = _base[new_ind + j];
261 328 }
262 329 _index = new_ind;
263 330 *n = k;
264 331 return true;
265 332 }
266 333 }
267 334
268 335 template<class OopClosureClass>
269 336 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
270 337 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
271 338 || SafepointSynchronize::is_at_safepoint(),
272 339 "Drain recursion must be yield-safe.");
273 340 bool res = true;
274 341 debug_only(_drain_in_progress = true);
275 342 debug_only(_drain_in_progress_yields = yield_after);
276 343 while (!isEmpty()) {
277 344 oop newOop = pop();
278 345 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
279 346 assert(newOop->is_oop(), "Expected an oop");
280 347 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
281 348 "only grey objects on this stack");
282 349 newOop->oop_iterate(cl);
283 350 if (yield_after && _cm->do_yield_check()) {
284 351 res = false;
285 352 break;
286 353 }
287 354 }
288 355 debug_only(_drain_in_progress = false);
289 356 return res;
290 357 }
291 358
292 359 void CMMarkStack::note_start_of_gc() {
293 360 assert(_saved_index == -1,
294 361 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
295 362 _saved_index = _index;
296 363 }
297 364
298 365 void CMMarkStack::note_end_of_gc() {
299 366 // This is intentionally a guarantee, instead of an assert. If we
300 367 // accidentally add something to the mark stack during GC, it
301 368 // will be a correctness issue so it's better if we crash. we'll
302 369 // only check this once per GC anyway, so it won't be a performance
303 370 // issue in any way.
304 371 guarantee(_saved_index == _index,
305 372 err_msg("saved index: %d index: %d", _saved_index, _index));
306 373 _saved_index = -1;
307 374 }
308 375
309 376 void CMMarkStack::oops_do(OopClosure* f) {
310 377 assert(_saved_index == _index,
311 378 err_msg("saved index: %d index: %d", _saved_index, _index));
312 379 for (int i = 0; i < _index; i += 1) {
313 380 f->do_oop(&_base[i]);
314 381 }
315 382 }
316 383
317 384 bool ConcurrentMark::not_yet_marked(oop obj) const {
318 385 return _g1h->is_obj_ill(obj);
319 386 }
320 387
321 388 CMRootRegions::CMRootRegions() :
322 389 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
323 390 _should_abort(false), _next_survivor(NULL) { }
324 391
325 392 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
326 393 _young_list = g1h->young_list();
327 394 _cm = cm;
328 395 }
329 396
330 397 void CMRootRegions::prepare_for_scan() {
331 398 assert(!scan_in_progress(), "pre-condition");
332 399
333 400 // Currently, only survivors can be root regions.
334 401 assert(_next_survivor == NULL, "pre-condition");
335 402 _next_survivor = _young_list->first_survivor_region();
336 403 _scan_in_progress = (_next_survivor != NULL);
337 404 _should_abort = false;
338 405 }
339 406
340 407 HeapRegion* CMRootRegions::claim_next() {
341 408 if (_should_abort) {
342 409 // If someone has set the should_abort flag, we return NULL to
343 410 // force the caller to bail out of their loop.
344 411 return NULL;
345 412 }
346 413
347 414 // Currently, only survivors can be root regions.
348 415 HeapRegion* res = _next_survivor;
349 416 if (res != NULL) {
350 417 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
351 418 // Read it again in case it changed while we were waiting for the lock.
352 419 res = _next_survivor;
353 420 if (res != NULL) {
354 421 if (res == _young_list->last_survivor_region()) {
355 422 // We just claimed the last survivor so store NULL to indicate
356 423 // that we're done.
357 424 _next_survivor = NULL;
358 425 } else {
359 426 _next_survivor = res->get_next_young_region();
360 427 }
361 428 } else {
362 429 // Someone else claimed the last survivor while we were trying
363 430 // to take the lock so nothing else to do.
364 431 }
365 432 }
366 433 assert(res == NULL || res->is_survivor(), "post-condition");
367 434
368 435 return res;
369 436 }
370 437
371 438 void CMRootRegions::scan_finished() {
372 439 assert(scan_in_progress(), "pre-condition");
373 440
374 441 // Currently, only survivors can be root regions.
375 442 if (!_should_abort) {
376 443 assert(_next_survivor == NULL, "we should have claimed all survivors");
377 444 }
378 445 _next_survivor = NULL;
379 446
380 447 {
381 448 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
382 449 _scan_in_progress = false;
383 450 RootRegionScan_lock->notify_all();
384 451 }
385 452 }
386 453
387 454 bool CMRootRegions::wait_until_scan_finished() {
388 455 if (!scan_in_progress()) return false;
389 456
390 457 {
391 458 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
392 459 while (scan_in_progress()) {
393 460 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
394 461 }
395 462 }
396 463 return true;
↓ open down ↓ |
128 lines elided |
↑ open up ↑ |
397 464 }
398 465
399 466 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
400 467 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
401 468 #endif // _MSC_VER
402 469
403 470 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
404 471 return MAX2((n_par_threads + 2) / 4, 1U);
405 472 }
406 473
407 -ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
408 - _markBitMap1(rs, MinObjAlignment - 1),
409 - _markBitMap2(rs, MinObjAlignment - 1),
474 +ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
475 + _g1h(g1h),
476 + _markBitMap1(MinObjAlignment - 1),
477 + _markBitMap2(MinObjAlignment - 1),
410 478
411 479 _parallel_marking_threads(0),
412 480 _max_parallel_marking_threads(0),
413 481 _sleep_factor(0.0),
414 482 _marking_task_overhead(1.0),
415 483 _cleanup_sleep_factor(0.0),
416 484 _cleanup_task_overhead(1.0),
417 485 _cleanup_list("Cleanup List"),
418 - _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
419 - _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
420 - CardTableModRefBS::card_shift,
421 - false /* in_resource_area*/),
486 + _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
487 + _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
488 + CardTableModRefBS::card_shift,
489 + false /* in_resource_area*/),
422 490
423 491 _prevMarkBitMap(&_markBitMap1),
424 492 _nextMarkBitMap(&_markBitMap2),
425 493
426 494 _markStack(this),
427 495 // _finger set in set_non_marking_state
428 496
429 497 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
430 498 // _active_tasks set in set_non_marking_state
431 499 // _tasks set inside the constructor
432 500 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
433 501 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
434 502
435 503 _has_overflown(false),
436 504 _concurrent(false),
437 505 _has_aborted(false),
438 506 _restart_for_overflow(false),
439 507 _concurrent_marking_in_progress(false),
440 508
441 509 // _verbose_level set below
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
442 510
443 511 _init_times(),
444 512 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
445 513 _cleanup_times(),
446 514 _total_counting_time(0.0),
447 515 _total_rs_scrub_time(0.0),
448 516
449 517 _parallel_workers(NULL),
450 518
451 519 _count_card_bitmaps(NULL),
452 - _count_marked_bytes(NULL) {
520 + _count_marked_bytes(NULL),
521 + _completed_initialization(false) {
453 522 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
454 523 if (verbose_level < no_verbose) {
455 524 verbose_level = no_verbose;
456 525 }
457 526 if (verbose_level > high_verbose) {
458 527 verbose_level = high_verbose;
459 528 }
460 529 _verbose_level = verbose_level;
461 530
462 531 if (verbose_low()) {
463 532 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
464 533 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
465 534 }
466 535
467 - _markStack.allocate(MarkStackSize);
536 + if (!_markBitMap1.allocate(heap_rs)) {
537 + warning("Failed to allocate first CM bit map");
538 + return;
539 + }
540 + if (!_markBitMap2.allocate(heap_rs)) {
541 + warning("Failed to allocate second CM bit map");
542 + return;
543 + }
468 544
469 545 // Create & start a ConcurrentMark thread.
470 546 _cmThread = new ConcurrentMarkThread(this);
471 547 assert(cmThread() != NULL, "CM Thread should have been created");
472 548 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
473 549
474 - _g1h = G1CollectedHeap::heap();
475 550 assert(CGC_lock != NULL, "Where's the CGC_lock?");
476 - assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
477 - assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
551 + assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
552 + assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
478 553
479 554 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
480 555 satb_qs.set_buffer_size(G1SATBBufferSize);
481 556
482 557 _root_regions.init(_g1h, this);
483 558
484 - _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
485 - _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
486 -
487 - _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
488 - _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
489 -
490 - BitMap::idx_t card_bm_size = _card_bm.size();
491 -
492 - // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
493 - _active_tasks = _max_worker_id;
494 - for (uint i = 0; i < _max_worker_id; ++i) {
495 - CMTaskQueue* task_queue = new CMTaskQueue();
496 - task_queue->initialize();
497 - _task_queues->register_queue(i, task_queue);
498 -
499 - _count_card_bitmaps[i] = BitMap(card_bm_size, false);
500 - _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
501 -
502 - _tasks[i] = new CMTask(i, this,
503 - _count_marked_bytes[i],
504 - &_count_card_bitmaps[i],
505 - task_queue, _task_queues);
506 -
507 - _accum_task_vtime[i] = 0.0;
508 - }
509 -
510 - // Calculate the card number for the bottom of the heap. Used
511 - // in biasing indexes into the accounting card bitmaps.
512 - _heap_bottom_card_num =
513 - intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
514 - CardTableModRefBS::card_shift);
515 -
516 - // Clear all the liveness counting data
517 - clear_all_count_data();
518 -
519 559 if (ConcGCThreads > ParallelGCThreads) {
520 - vm_exit_during_initialization("Can't have more ConcGCThreads "
521 - "than ParallelGCThreads.");
560 + warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
561 + "than ParallelGCThreads (" UINT32_FORMAT ").",
562 + ConcGCThreads, ParallelGCThreads);
563 + return;
522 564 }
523 565 if (ParallelGCThreads == 0) {
524 566 // if we are not running with any parallel GC threads we will not
525 567 // spawn any marking threads either
526 568 _parallel_marking_threads = 0;
527 569 _max_parallel_marking_threads = 0;
528 570 _sleep_factor = 0.0;
529 571 _marking_task_overhead = 1.0;
530 572 } else {
531 573 if (ConcGCThreads > 0) {
532 574 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
533 575 // if both are set
534 576
535 577 _parallel_marking_threads = (uint) ConcGCThreads;
536 578 _max_parallel_marking_threads = _parallel_marking_threads;
537 579 _sleep_factor = 0.0;
538 580 _marking_task_overhead = 1.0;
539 581 } else if (G1MarkingOverheadPercent > 0) {
540 582 // we will calculate the number of parallel marking threads
541 583 // based on a target overhead with respect to the soft real-time
542 584 // goal
543 585
544 586 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
545 587 double overall_cm_overhead =
546 588 (double) MaxGCPauseMillis * marking_overhead /
547 589 (double) GCPauseIntervalMillis;
548 590 double cpu_ratio = 1.0 / (double) os::processor_count();
549 591 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
550 592 double marking_task_overhead =
551 593 overall_cm_overhead / marking_thread_num *
552 594 (double) os::processor_count();
553 595 double sleep_factor =
554 596 (1.0 - marking_task_overhead) / marking_task_overhead;
555 597
556 598 _parallel_marking_threads = (uint) marking_thread_num;
557 599 _max_parallel_marking_threads = _parallel_marking_threads;
558 600 _sleep_factor = sleep_factor;
559 601 _marking_task_overhead = marking_task_overhead;
560 602 } else {
561 603 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
562 604 _max_parallel_marking_threads = _parallel_marking_threads;
563 605 _sleep_factor = 0.0;
564 606 _marking_task_overhead = 1.0;
565 607 }
566 608
567 609 if (parallel_marking_threads() > 1) {
568 610 _cleanup_task_overhead = 1.0;
569 611 } else {
570 612 _cleanup_task_overhead = marking_task_overhead();
571 613 }
572 614 _cleanup_sleep_factor =
573 615 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
574 616
575 617 #if 0
576 618 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
577 619 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
578 620 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
579 621 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
580 622 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
581 623 #endif
582 624
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
583 625 guarantee(parallel_marking_threads() > 0, "peace of mind");
584 626 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
585 627 _max_parallel_marking_threads, false, true);
586 628 if (_parallel_workers == NULL) {
587 629 vm_exit_during_initialization("Failed necessary allocation.");
588 630 } else {
589 631 _parallel_workers->initialize_workers();
590 632 }
591 633 }
592 634
635 + if (FLAG_IS_DEFAULT(MarkStackSize)) {
636 + uintx mark_stack_size =
637 + MIN2(MarkStackSizeMax,
638 + MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
639 + // Verify that the calculated value for MarkStackSize is in range.
640 + // It would be nice to use the private utility routine from Arguments.
641 + if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
642 + warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
643 + "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
644 + mark_stack_size, 1, MarkStackSizeMax);
645 + return;
646 + }
647 + FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
648 + } else {
649 + // Verify MarkStackSize is in range.
650 + if (FLAG_IS_CMDLINE(MarkStackSize)) {
651 + if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
652 + if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
653 + warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
654 + "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
655 + MarkStackSize, 1, MarkStackSizeMax);
656 + return;
657 + }
658 + } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
659 + if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
660 + warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
661 + " or for MarkStackSizeMax (" UINTX_FORMAT ")",
662 + MarkStackSize, MarkStackSizeMax);
663 + return;
664 + }
665 + }
666 + }
667 + }
668 +
669 + if (!_markStack.allocate(MarkStackSize)) {
670 + warning("Failed to allocate CM marking stack");
671 + return;
672 + }
673 +
674 + _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
675 + _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
676 +
677 + _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
678 + _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
679 +
680 + BitMap::idx_t card_bm_size = _card_bm.size();
681 +
682 + // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
683 + _active_tasks = _max_worker_id;
684 +
685 + size_t max_regions = (size_t) _g1h->max_regions();
686 + for (uint i = 0; i < _max_worker_id; ++i) {
687 + CMTaskQueue* task_queue = new CMTaskQueue();
688 + task_queue->initialize();
689 + _task_queues->register_queue(i, task_queue);
690 +
691 + _count_card_bitmaps[i] = BitMap(card_bm_size, false);
692 + _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
693 +
694 + _tasks[i] = new CMTask(i, this,
695 + _count_marked_bytes[i],
696 + &_count_card_bitmaps[i],
697 + task_queue, _task_queues);
698 +
699 + _accum_task_vtime[i] = 0.0;
700 + }
701 +
702 + // Calculate the card number for the bottom of the heap. Used
703 + // in biasing indexes into the accounting card bitmaps.
704 + _heap_bottom_card_num =
705 + intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
706 + CardTableModRefBS::card_shift);
707 +
708 + // Clear all the liveness counting data
709 + clear_all_count_data();
710 +
593 711 // so that the call below can read a sensible value
594 - _heap_start = (HeapWord*) rs.base();
712 + _heap_start = (HeapWord*) heap_rs.base();
595 713 set_non_marking_state();
714 + _completed_initialization = true;
596 715 }
597 716
598 717 void ConcurrentMark::update_g1_committed(bool force) {
599 718 // If concurrent marking is not in progress, then we do not need to
600 719 // update _heap_end.
601 720 if (!concurrent_marking_in_progress() && !force) return;
602 721
603 722 MemRegion committed = _g1h->g1_committed();
604 723 assert(committed.start() == _heap_start, "start shouldn't change");
605 724 HeapWord* new_end = committed.end();
606 725 if (new_end > _heap_end) {
607 726 // The heap has been expanded.
608 727
609 728 _heap_end = new_end;
610 729 }
611 730 // Notice that the heap can also shrink. However, this only happens
612 731 // during a Full GC (at least currently) and the entire marking
613 732 // phase will bail out and the task will not be restarted. So, let's
614 733 // do nothing.
615 734 }
616 735
617 736 void ConcurrentMark::reset() {
618 737 // Starting values for these two. This should be called in a STW
619 738 // phase. CM will be notified of any future g1_committed expansions
620 739 // will be at the end of evacuation pauses, when tasks are
621 740 // inactive.
622 741 MemRegion committed = _g1h->g1_committed();
623 742 _heap_start = committed.start();
624 743 _heap_end = committed.end();
625 744
626 745 // Separated the asserts so that we know which one fires.
627 746 assert(_heap_start != NULL, "heap bounds should look ok");
628 747 assert(_heap_end != NULL, "heap bounds should look ok");
629 748 assert(_heap_start < _heap_end, "heap bounds should look ok");
630 749
631 750 // reset all the marking data structures and any necessary flags
632 751 clear_marking_state();
633 752
634 753 if (verbose_low()) {
635 754 gclog_or_tty->print_cr("[global] resetting");
636 755 }
637 756
638 757 // We do reset all of them, since different phases will use
639 758 // different number of active threads. So, it's easiest to have all
640 759 // of them ready.
641 760 for (uint i = 0; i < _max_worker_id; ++i) {
642 761 _tasks[i]->reset(_nextMarkBitMap);
643 762 }
644 763
645 764 // we need this to make sure that the flag is on during the evac
646 765 // pause with initial mark piggy-backed
647 766 set_concurrent_marking_in_progress();
648 767 }
649 768
650 769 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
651 770 assert(active_tasks <= _max_worker_id, "we should not have more");
652 771
653 772 _active_tasks = active_tasks;
654 773 // Need to update the three data structures below according to the
655 774 // number of active threads for this phase.
656 775 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
657 776 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
658 777 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
659 778
660 779 _concurrent = concurrent;
661 780 // We propagate this to all tasks, not just the active ones.
662 781 for (uint i = 0; i < _max_worker_id; ++i)
663 782 _tasks[i]->set_concurrent(concurrent);
664 783
665 784 if (concurrent) {
666 785 set_concurrent_marking_in_progress();
667 786 } else {
668 787 // We currently assume that the concurrent flag has been set to
669 788 // false before we start remark. At this point we should also be
670 789 // in a STW phase.
671 790 assert(!concurrent_marking_in_progress(), "invariant");
672 791 assert(_finger == _heap_end, "only way to get here");
673 792 update_g1_committed(true);
674 793 }
675 794 }
676 795
677 796 void ConcurrentMark::set_non_marking_state() {
678 797 // We set the global marking state to some default values when we're
679 798 // not doing marking.
680 799 clear_marking_state();
681 800 _active_tasks = 0;
682 801 clear_concurrent_marking_in_progress();
683 802 }
684 803
685 804 ConcurrentMark::~ConcurrentMark() {
686 805 // The ConcurrentMark instance is never freed.
687 806 ShouldNotReachHere();
688 807 }
689 808
690 809 void ConcurrentMark::clearNextBitmap() {
691 810 G1CollectedHeap* g1h = G1CollectedHeap::heap();
692 811 G1CollectorPolicy* g1p = g1h->g1_policy();
693 812
694 813 // Make sure that the concurrent mark thread looks to still be in
695 814 // the current cycle.
696 815 guarantee(cmThread()->during_cycle(), "invariant");
697 816
698 817 // We are finishing up the current cycle by clearing the next
699 818 // marking bitmap and getting it ready for the next cycle. During
700 819 // this time no other cycle can start. So, let's make sure that this
701 820 // is the case.
702 821 guarantee(!g1h->mark_in_progress(), "invariant");
703 822
704 823 // clear the mark bitmap (no grey objects to start with).
705 824 // We need to do this in chunks and offer to yield in between
706 825 // each chunk.
707 826 HeapWord* start = _nextMarkBitMap->startWord();
708 827 HeapWord* end = _nextMarkBitMap->endWord();
709 828 HeapWord* cur = start;
710 829 size_t chunkSize = M;
711 830 while (cur < end) {
712 831 HeapWord* next = cur + chunkSize;
713 832 if (next > end) {
714 833 next = end;
715 834 }
716 835 MemRegion mr(cur,next);
717 836 _nextMarkBitMap->clearRange(mr);
718 837 cur = next;
719 838 do_yield_check();
720 839
721 840 // Repeat the asserts from above. We'll do them as asserts here to
722 841 // minimize their overhead on the product. However, we'll have
723 842 // them as guarantees at the beginning / end of the bitmap
724 843 // clearing to get some checking in the product.
725 844 assert(cmThread()->during_cycle(), "invariant");
726 845 assert(!g1h->mark_in_progress(), "invariant");
727 846 }
728 847
729 848 // Clear the liveness counting data
730 849 clear_all_count_data();
731 850
732 851 // Repeat the asserts from above.
733 852 guarantee(cmThread()->during_cycle(), "invariant");
734 853 guarantee(!g1h->mark_in_progress(), "invariant");
735 854 }
736 855
737 856 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
738 857 public:
739 858 bool doHeapRegion(HeapRegion* r) {
740 859 if (!r->continuesHumongous()) {
741 860 r->note_start_of_marking();
742 861 }
743 862 return false;
744 863 }
745 864 };
746 865
747 866 void ConcurrentMark::checkpointRootsInitialPre() {
748 867 G1CollectedHeap* g1h = G1CollectedHeap::heap();
749 868 G1CollectorPolicy* g1p = g1h->g1_policy();
750 869
751 870 _has_aborted = false;
752 871
753 872 #ifndef PRODUCT
754 873 if (G1PrintReachableAtInitialMark) {
755 874 print_reachable("at-cycle-start",
756 875 VerifyOption_G1UsePrevMarking, true /* all */);
757 876 }
758 877 #endif
759 878
760 879 // Initialise marking structures. This has to be done in a STW phase.
761 880 reset();
762 881
763 882 // For each region note start of marking.
764 883 NoteStartOfMarkHRClosure startcl;
765 884 g1h->heap_region_iterate(&startcl);
766 885 }
767 886
768 887
769 888 void ConcurrentMark::checkpointRootsInitialPost() {
770 889 G1CollectedHeap* g1h = G1CollectedHeap::heap();
771 890
772 891 // If we force an overflow during remark, the remark operation will
773 892 // actually abort and we'll restart concurrent marking. If we always
774 893 // force an oveflow during remark we'll never actually complete the
775 894 // marking phase. So, we initilize this here, at the start of the
776 895 // cycle, so that at the remaining overflow number will decrease at
777 896 // every remark and we'll eventually not need to cause one.
778 897 force_overflow_stw()->init();
779 898
780 899 // Start Concurrent Marking weak-reference discovery.
781 900 ReferenceProcessor* rp = g1h->ref_processor_cm();
782 901 // enable ("weak") refs discovery
783 902 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
784 903 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
785 904
786 905 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
787 906 // This is the start of the marking cycle, we're expected all
788 907 // threads to have SATB queues with active set to false.
789 908 satb_mq_set.set_active_all_threads(true, /* new active value */
790 909 false /* expected_active */);
791 910
792 911 _root_regions.prepare_for_scan();
793 912
794 913 // update_g1_committed() will be called at the end of an evac pause
795 914 // when marking is on. So, it's also called at the end of the
796 915 // initial-mark pause to update the heap end, if the heap expands
797 916 // during it. No need to call it here.
798 917 }
799 918
800 919 /*
801 920 * Notice that in the next two methods, we actually leave the STS
802 921 * during the barrier sync and join it immediately afterwards. If we
803 922 * do not do this, the following deadlock can occur: one thread could
804 923 * be in the barrier sync code, waiting for the other thread to also
805 924 * sync up, whereas another one could be trying to yield, while also
806 925 * waiting for the other threads to sync up too.
807 926 *
808 927 * Note, however, that this code is also used during remark and in
809 928 * this case we should not attempt to leave / enter the STS, otherwise
810 929 * we'll either hit an asseert (debug / fastdebug) or deadlock
811 930 * (product). So we should only leave / enter the STS if we are
812 931 * operating concurrently.
813 932 *
814 933 * Because the thread that does the sync barrier has left the STS, it
815 934 * is possible to be suspended for a Full GC or an evacuation pause
816 935 * could occur. This is actually safe, since the entering the sync
817 936 * barrier is one of the last things do_marking_step() does, and it
818 937 * doesn't manipulate any data structures afterwards.
819 938 */
820 939
821 940 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
822 941 if (verbose_low()) {
823 942 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
824 943 }
825 944
826 945 if (concurrent()) {
827 946 ConcurrentGCThread::stsLeave();
828 947 }
829 948 _first_overflow_barrier_sync.enter();
830 949 if (concurrent()) {
831 950 ConcurrentGCThread::stsJoin();
832 951 }
833 952 // at this point everyone should have synced up and not be doing any
834 953 // more work
835 954
836 955 if (verbose_low()) {
837 956 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
838 957 }
839 958
840 959 // let the task associated with with worker 0 do this
841 960 if (worker_id == 0) {
842 961 // task 0 is responsible for clearing the global data structures
843 962 // We should be here because of an overflow. During STW we should
844 963 // not clear the overflow flag since we rely on it being true when
845 964 // we exit this method to abort the pause and restart concurent
846 965 // marking.
847 966 clear_marking_state(concurrent() /* clear_overflow */);
848 967 force_overflow()->update();
849 968
850 969 if (G1Log::fine()) {
851 970 gclog_or_tty->date_stamp(PrintGCDateStamps);
852 971 gclog_or_tty->stamp(PrintGCTimeStamps);
853 972 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
854 973 }
855 974 }
856 975
857 976 // after this, each task should reset its own data structures then
858 977 // then go into the second barrier
859 978 }
860 979
861 980 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
862 981 if (verbose_low()) {
863 982 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
864 983 }
865 984
866 985 if (concurrent()) {
867 986 ConcurrentGCThread::stsLeave();
868 987 }
869 988 _second_overflow_barrier_sync.enter();
870 989 if (concurrent()) {
871 990 ConcurrentGCThread::stsJoin();
872 991 }
873 992 // at this point everything should be re-initialised and ready to go
874 993
875 994 if (verbose_low()) {
876 995 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
877 996 }
878 997 }
879 998
880 999 #ifndef PRODUCT
881 1000 void ForceOverflowSettings::init() {
882 1001 _num_remaining = G1ConcMarkForceOverflow;
883 1002 _force = false;
884 1003 update();
885 1004 }
886 1005
887 1006 void ForceOverflowSettings::update() {
888 1007 if (_num_remaining > 0) {
889 1008 _num_remaining -= 1;
890 1009 _force = true;
891 1010 } else {
892 1011 _force = false;
893 1012 }
894 1013 }
895 1014
896 1015 bool ForceOverflowSettings::should_force() {
897 1016 if (_force) {
898 1017 _force = false;
899 1018 return true;
900 1019 } else {
901 1020 return false;
902 1021 }
903 1022 }
904 1023 #endif // !PRODUCT
905 1024
906 1025 class CMConcurrentMarkingTask: public AbstractGangTask {
907 1026 private:
908 1027 ConcurrentMark* _cm;
909 1028 ConcurrentMarkThread* _cmt;
910 1029
911 1030 public:
912 1031 void work(uint worker_id) {
913 1032 assert(Thread::current()->is_ConcurrentGC_thread(),
914 1033 "this should only be done by a conc GC thread");
915 1034 ResourceMark rm;
916 1035
917 1036 double start_vtime = os::elapsedVTime();
918 1037
919 1038 ConcurrentGCThread::stsJoin();
920 1039
921 1040 assert(worker_id < _cm->active_tasks(), "invariant");
922 1041 CMTask* the_task = _cm->task(worker_id);
923 1042 the_task->record_start_time();
924 1043 if (!_cm->has_aborted()) {
925 1044 do {
926 1045 double start_vtime_sec = os::elapsedVTime();
927 1046 double start_time_sec = os::elapsedTime();
928 1047 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
929 1048
930 1049 the_task->do_marking_step(mark_step_duration_ms,
931 1050 true /* do_stealing */,
932 1051 true /* do_termination */);
933 1052
934 1053 double end_time_sec = os::elapsedTime();
935 1054 double end_vtime_sec = os::elapsedVTime();
936 1055 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
937 1056 double elapsed_time_sec = end_time_sec - start_time_sec;
938 1057 _cm->clear_has_overflown();
939 1058
940 1059 bool ret = _cm->do_yield_check(worker_id);
941 1060
942 1061 jlong sleep_time_ms;
943 1062 if (!_cm->has_aborted() && the_task->has_aborted()) {
944 1063 sleep_time_ms =
945 1064 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
946 1065 ConcurrentGCThread::stsLeave();
947 1066 os::sleep(Thread::current(), sleep_time_ms, false);
948 1067 ConcurrentGCThread::stsJoin();
949 1068 }
950 1069 double end_time2_sec = os::elapsedTime();
951 1070 double elapsed_time2_sec = end_time2_sec - start_time_sec;
952 1071
953 1072 #if 0
954 1073 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
955 1074 "overhead %1.4lf",
956 1075 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
957 1076 the_task->conc_overhead(os::elapsedTime()) * 8.0);
958 1077 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
959 1078 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
960 1079 #endif
961 1080 } while (!_cm->has_aborted() && the_task->has_aborted());
962 1081 }
963 1082 the_task->record_end_time();
964 1083 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
965 1084
966 1085 ConcurrentGCThread::stsLeave();
967 1086
968 1087 double end_vtime = os::elapsedVTime();
969 1088 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
970 1089 }
971 1090
972 1091 CMConcurrentMarkingTask(ConcurrentMark* cm,
973 1092 ConcurrentMarkThread* cmt) :
974 1093 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
975 1094
976 1095 ~CMConcurrentMarkingTask() { }
977 1096 };
978 1097
979 1098 // Calculates the number of active workers for a concurrent
980 1099 // phase.
981 1100 uint ConcurrentMark::calc_parallel_marking_threads() {
982 1101 if (G1CollectedHeap::use_parallel_gc_threads()) {
983 1102 uint n_conc_workers = 0;
984 1103 if (!UseDynamicNumberOfGCThreads ||
985 1104 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
986 1105 !ForceDynamicNumberOfGCThreads)) {
987 1106 n_conc_workers = max_parallel_marking_threads();
988 1107 } else {
989 1108 n_conc_workers =
990 1109 AdaptiveSizePolicy::calc_default_active_workers(
991 1110 max_parallel_marking_threads(),
992 1111 1, /* Minimum workers */
993 1112 parallel_marking_threads(),
994 1113 Threads::number_of_non_daemon_threads());
995 1114 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
996 1115 // that scaling has already gone into "_max_parallel_marking_threads".
997 1116 }
998 1117 assert(n_conc_workers > 0, "Always need at least 1");
999 1118 return n_conc_workers;
1000 1119 }
1001 1120 // If we are not running with any parallel GC threads we will not
1002 1121 // have spawned any marking threads either. Hence the number of
1003 1122 // concurrent workers should be 0.
1004 1123 return 0;
1005 1124 }
1006 1125
1007 1126 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1008 1127 // Currently, only survivors can be root regions.
1009 1128 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1010 1129 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1011 1130
1012 1131 const uintx interval = PrefetchScanIntervalInBytes;
1013 1132 HeapWord* curr = hr->bottom();
1014 1133 const HeapWord* end = hr->top();
1015 1134 while (curr < end) {
1016 1135 Prefetch::read(curr, interval);
1017 1136 oop obj = oop(curr);
1018 1137 int size = obj->oop_iterate(&cl);
1019 1138 assert(size == obj->size(), "sanity");
1020 1139 curr += size;
1021 1140 }
1022 1141 }
1023 1142
1024 1143 class CMRootRegionScanTask : public AbstractGangTask {
1025 1144 private:
1026 1145 ConcurrentMark* _cm;
1027 1146
1028 1147 public:
1029 1148 CMRootRegionScanTask(ConcurrentMark* cm) :
1030 1149 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1031 1150
1032 1151 void work(uint worker_id) {
1033 1152 assert(Thread::current()->is_ConcurrentGC_thread(),
1034 1153 "this should only be done by a conc GC thread");
1035 1154
1036 1155 CMRootRegions* root_regions = _cm->root_regions();
1037 1156 HeapRegion* hr = root_regions->claim_next();
1038 1157 while (hr != NULL) {
1039 1158 _cm->scanRootRegion(hr, worker_id);
1040 1159 hr = root_regions->claim_next();
1041 1160 }
1042 1161 }
1043 1162 };
1044 1163
1045 1164 void ConcurrentMark::scanRootRegions() {
1046 1165 // scan_in_progress() will have been set to true only if there was
1047 1166 // at least one root region to scan. So, if it's false, we
1048 1167 // should not attempt to do any further work.
1049 1168 if (root_regions()->scan_in_progress()) {
1050 1169 _parallel_marking_threads = calc_parallel_marking_threads();
1051 1170 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1052 1171 "Maximum number of marking threads exceeded");
1053 1172 uint active_workers = MAX2(1U, parallel_marking_threads());
1054 1173
1055 1174 CMRootRegionScanTask task(this);
1056 1175 if (parallel_marking_threads() > 0) {
1057 1176 _parallel_workers->set_active_workers((int) active_workers);
1058 1177 _parallel_workers->run_task(&task);
1059 1178 } else {
1060 1179 task.work(0);
1061 1180 }
1062 1181
1063 1182 // It's possible that has_aborted() is true here without actually
1064 1183 // aborting the survivor scan earlier. This is OK as it's
1065 1184 // mainly used for sanity checking.
1066 1185 root_regions()->scan_finished();
1067 1186 }
1068 1187 }
1069 1188
1070 1189 void ConcurrentMark::markFromRoots() {
1071 1190 // we might be tempted to assert that:
1072 1191 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1073 1192 // "inconsistent argument?");
1074 1193 // However that wouldn't be right, because it's possible that
1075 1194 // a safepoint is indeed in progress as a younger generation
1076 1195 // stop-the-world GC happens even as we mark in this generation.
1077 1196
1078 1197 _restart_for_overflow = false;
1079 1198 force_overflow_conc()->init();
1080 1199
1081 1200 // _g1h has _n_par_threads
1082 1201 _parallel_marking_threads = calc_parallel_marking_threads();
1083 1202 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1084 1203 "Maximum number of marking threads exceeded");
1085 1204
1086 1205 uint active_workers = MAX2(1U, parallel_marking_threads());
1087 1206
1088 1207 // Parallel task terminator is set in "set_phase()"
1089 1208 set_phase(active_workers, true /* concurrent */);
1090 1209
1091 1210 CMConcurrentMarkingTask markingTask(this, cmThread());
1092 1211 if (parallel_marking_threads() > 0) {
1093 1212 _parallel_workers->set_active_workers((int)active_workers);
1094 1213 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1095 1214 // and the decisions on that MT processing is made elsewhere.
1096 1215 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1097 1216 _parallel_workers->run_task(&markingTask);
1098 1217 } else {
1099 1218 markingTask.work(0);
1100 1219 }
1101 1220 print_stats();
1102 1221 }
1103 1222
1104 1223 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1105 1224 // world is stopped at this checkpoint
1106 1225 assert(SafepointSynchronize::is_at_safepoint(),
1107 1226 "world should be stopped");
1108 1227
1109 1228 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1110 1229
1111 1230 // If a full collection has happened, we shouldn't do this.
1112 1231 if (has_aborted()) {
1113 1232 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1114 1233 return;
1115 1234 }
1116 1235
1117 1236 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1118 1237
1119 1238 if (VerifyDuringGC) {
1120 1239 HandleMark hm; // handle scope
1121 1240 gclog_or_tty->print(" VerifyDuringGC:(before)");
1122 1241 Universe::heap()->prepare_for_verify();
1123 1242 Universe::verify(/* silent */ false,
1124 1243 /* option */ VerifyOption_G1UsePrevMarking);
1125 1244 }
1126 1245
1127 1246 G1CollectorPolicy* g1p = g1h->g1_policy();
1128 1247 g1p->record_concurrent_mark_remark_start();
1129 1248
1130 1249 double start = os::elapsedTime();
1131 1250
1132 1251 checkpointRootsFinalWork();
1133 1252
1134 1253 double mark_work_end = os::elapsedTime();
1135 1254
1136 1255 weakRefsWork(clear_all_soft_refs);
1137 1256
1138 1257 if (has_overflown()) {
1139 1258 // Oops. We overflowed. Restart concurrent marking.
1140 1259 _restart_for_overflow = true;
1141 1260 // Clear the flag. We do not need it any more.
1142 1261 clear_has_overflown();
1143 1262 if (G1TraceMarkStackOverflow) {
1144 1263 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1145 1264 }
1146 1265 } else {
1147 1266 // Aggregate the per-task counting data that we have accumulated
1148 1267 // while marking.
1149 1268 aggregate_count_data();
1150 1269
1151 1270 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1152 1271 // We're done with marking.
1153 1272 // This is the end of the marking cycle, we're expected all
1154 1273 // threads to have SATB queues with active set to true.
1155 1274 satb_mq_set.set_active_all_threads(false, /* new active value */
1156 1275 true /* expected_active */);
1157 1276
↓ open down ↓ |
552 lines elided |
↑ open up ↑ |
1158 1277 if (VerifyDuringGC) {
1159 1278 HandleMark hm; // handle scope
1160 1279 gclog_or_tty->print(" VerifyDuringGC:(after)");
1161 1280 Universe::heap()->prepare_for_verify();
1162 1281 Universe::verify(/* silent */ false,
1163 1282 /* option */ VerifyOption_G1UseNextMarking);
1164 1283 }
1165 1284 assert(!restart_for_overflow(), "sanity");
1166 1285 }
1167 1286
1287 + // Expand the marking stack, if we have to and if we can.
1288 + if (_markStack.should_expand()) {
1289 + _markStack.expand();
1290 + }
1291 +
1168 1292 // Reset the marking state if marking completed
1169 1293 if (!restart_for_overflow()) {
1170 1294 set_non_marking_state();
1171 1295 }
1172 1296
1173 1297 #if VERIFY_OBJS_PROCESSED
1174 1298 _scan_obj_cl.objs_processed = 0;
1175 1299 ThreadLocalObjQueue::objs_enqueued = 0;
1176 1300 #endif
1177 1301
1178 1302 // Statistics
1179 1303 double now = os::elapsedTime();
1180 1304 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1181 1305 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1182 1306 _remark_times.add((now - start) * 1000.0);
1183 1307
1184 1308 g1p->record_concurrent_mark_remark_end();
1185 1309 }
1186 1310
1187 1311 // Base class of the closures that finalize and verify the
1188 1312 // liveness counting data.
1189 1313 class CMCountDataClosureBase: public HeapRegionClosure {
1190 1314 protected:
1191 1315 G1CollectedHeap* _g1h;
1192 1316 ConcurrentMark* _cm;
1193 1317 CardTableModRefBS* _ct_bs;
1194 1318
1195 1319 BitMap* _region_bm;
1196 1320 BitMap* _card_bm;
1197 1321
1198 1322 // Takes a region that's not empty (i.e., it has at least one
1199 1323 // live object in it and sets its corresponding bit on the region
1200 1324 // bitmap to 1. If the region is "starts humongous" it will also set
1201 1325 // to 1 the bits on the region bitmap that correspond to its
1202 1326 // associated "continues humongous" regions.
1203 1327 void set_bit_for_region(HeapRegion* hr) {
1204 1328 assert(!hr->continuesHumongous(), "should have filtered those out");
1205 1329
1206 1330 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1207 1331 if (!hr->startsHumongous()) {
1208 1332 // Normal (non-humongous) case: just set the bit.
1209 1333 _region_bm->par_at_put(index, true);
1210 1334 } else {
1211 1335 // Starts humongous case: calculate how many regions are part of
1212 1336 // this humongous region and then set the bit range.
1213 1337 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1214 1338 _region_bm->par_at_put_range(index, end_index, true);
1215 1339 }
1216 1340 }
1217 1341
1218 1342 public:
1219 1343 CMCountDataClosureBase(G1CollectedHeap* g1h,
1220 1344 BitMap* region_bm, BitMap* card_bm):
1221 1345 _g1h(g1h), _cm(g1h->concurrent_mark()),
1222 1346 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1223 1347 _region_bm(region_bm), _card_bm(card_bm) { }
1224 1348 };
1225 1349
1226 1350 // Closure that calculates the # live objects per region. Used
1227 1351 // for verification purposes during the cleanup pause.
1228 1352 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1229 1353 CMBitMapRO* _bm;
1230 1354 size_t _region_marked_bytes;
1231 1355
1232 1356 public:
1233 1357 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1234 1358 BitMap* region_bm, BitMap* card_bm) :
1235 1359 CMCountDataClosureBase(g1h, region_bm, card_bm),
1236 1360 _bm(bm), _region_marked_bytes(0) { }
1237 1361
1238 1362 bool doHeapRegion(HeapRegion* hr) {
1239 1363
1240 1364 if (hr->continuesHumongous()) {
1241 1365 // We will ignore these here and process them when their
1242 1366 // associated "starts humongous" region is processed (see
1243 1367 // set_bit_for_heap_region()). Note that we cannot rely on their
1244 1368 // associated "starts humongous" region to have their bit set to
1245 1369 // 1 since, due to the region chunking in the parallel region
1246 1370 // iteration, a "continues humongous" region might be visited
1247 1371 // before its associated "starts humongous".
1248 1372 return false;
1249 1373 }
1250 1374
1251 1375 HeapWord* ntams = hr->next_top_at_mark_start();
1252 1376 HeapWord* start = hr->bottom();
1253 1377
1254 1378 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1255 1379 err_msg("Preconditions not met - "
1256 1380 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1257 1381 start, ntams, hr->end()));
1258 1382
1259 1383 // Find the first marked object at or after "start".
1260 1384 start = _bm->getNextMarkedWordAddress(start, ntams);
1261 1385
1262 1386 size_t marked_bytes = 0;
1263 1387
1264 1388 while (start < ntams) {
1265 1389 oop obj = oop(start);
1266 1390 int obj_sz = obj->size();
1267 1391 HeapWord* obj_end = start + obj_sz;
1268 1392
1269 1393 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1270 1394 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1271 1395
1272 1396 // Note: if we're looking at the last region in heap - obj_end
1273 1397 // could be actually just beyond the end of the heap; end_idx
1274 1398 // will then correspond to a (non-existent) card that is also
1275 1399 // just beyond the heap.
1276 1400 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1277 1401 // end of object is not card aligned - increment to cover
1278 1402 // all the cards spanned by the object
1279 1403 end_idx += 1;
1280 1404 }
1281 1405
1282 1406 // Set the bits in the card BM for the cards spanned by this object.
1283 1407 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1284 1408
1285 1409 // Add the size of this object to the number of marked bytes.
1286 1410 marked_bytes += (size_t)obj_sz * HeapWordSize;
1287 1411
1288 1412 // Find the next marked object after this one.
1289 1413 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1290 1414 }
1291 1415
1292 1416 // Mark the allocated-since-marking portion...
1293 1417 HeapWord* top = hr->top();
1294 1418 if (ntams < top) {
1295 1419 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1296 1420 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1297 1421
1298 1422 // Note: if we're looking at the last region in heap - top
1299 1423 // could be actually just beyond the end of the heap; end_idx
1300 1424 // will then correspond to a (non-existent) card that is also
1301 1425 // just beyond the heap.
1302 1426 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1303 1427 // end of object is not card aligned - increment to cover
1304 1428 // all the cards spanned by the object
1305 1429 end_idx += 1;
1306 1430 }
1307 1431 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1308 1432
1309 1433 // This definitely means the region has live objects.
1310 1434 set_bit_for_region(hr);
1311 1435 }
1312 1436
1313 1437 // Update the live region bitmap.
1314 1438 if (marked_bytes > 0) {
1315 1439 set_bit_for_region(hr);
1316 1440 }
1317 1441
1318 1442 // Set the marked bytes for the current region so that
1319 1443 // it can be queried by a calling verificiation routine
1320 1444 _region_marked_bytes = marked_bytes;
1321 1445
1322 1446 return false;
1323 1447 }
1324 1448
1325 1449 size_t region_marked_bytes() const { return _region_marked_bytes; }
1326 1450 };
1327 1451
1328 1452 // Heap region closure used for verifying the counting data
1329 1453 // that was accumulated concurrently and aggregated during
1330 1454 // the remark pause. This closure is applied to the heap
1331 1455 // regions during the STW cleanup pause.
1332 1456
1333 1457 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1334 1458 G1CollectedHeap* _g1h;
1335 1459 ConcurrentMark* _cm;
1336 1460 CalcLiveObjectsClosure _calc_cl;
1337 1461 BitMap* _region_bm; // Region BM to be verified
1338 1462 BitMap* _card_bm; // Card BM to be verified
1339 1463 bool _verbose; // verbose output?
1340 1464
1341 1465 BitMap* _exp_region_bm; // Expected Region BM values
1342 1466 BitMap* _exp_card_bm; // Expected card BM values
1343 1467
1344 1468 int _failures;
1345 1469
1346 1470 public:
1347 1471 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1348 1472 BitMap* region_bm,
1349 1473 BitMap* card_bm,
1350 1474 BitMap* exp_region_bm,
1351 1475 BitMap* exp_card_bm,
1352 1476 bool verbose) :
1353 1477 _g1h(g1h), _cm(g1h->concurrent_mark()),
1354 1478 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1355 1479 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1356 1480 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1357 1481 _failures(0) { }
1358 1482
1359 1483 int failures() const { return _failures; }
1360 1484
1361 1485 bool doHeapRegion(HeapRegion* hr) {
1362 1486 if (hr->continuesHumongous()) {
1363 1487 // We will ignore these here and process them when their
1364 1488 // associated "starts humongous" region is processed (see
1365 1489 // set_bit_for_heap_region()). Note that we cannot rely on their
1366 1490 // associated "starts humongous" region to have their bit set to
1367 1491 // 1 since, due to the region chunking in the parallel region
1368 1492 // iteration, a "continues humongous" region might be visited
1369 1493 // before its associated "starts humongous".
1370 1494 return false;
1371 1495 }
1372 1496
1373 1497 int failures = 0;
1374 1498
1375 1499 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1376 1500 // this region and set the corresponding bits in the expected region
1377 1501 // and card bitmaps.
1378 1502 bool res = _calc_cl.doHeapRegion(hr);
1379 1503 assert(res == false, "should be continuing");
1380 1504
1381 1505 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1382 1506 Mutex::_no_safepoint_check_flag);
1383 1507
1384 1508 // Verify the marked bytes for this region.
1385 1509 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1386 1510 size_t act_marked_bytes = hr->next_marked_bytes();
1387 1511
1388 1512 // We're not OK if expected marked bytes > actual marked bytes. It means
1389 1513 // we have missed accounting some objects during the actual marking.
1390 1514 if (exp_marked_bytes > act_marked_bytes) {
1391 1515 if (_verbose) {
1392 1516 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1393 1517 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1394 1518 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1395 1519 }
1396 1520 failures += 1;
1397 1521 }
1398 1522
1399 1523 // Verify the bit, for this region, in the actual and expected
1400 1524 // (which was just calculated) region bit maps.
1401 1525 // We're not OK if the bit in the calculated expected region
1402 1526 // bitmap is set and the bit in the actual region bitmap is not.
1403 1527 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1404 1528
1405 1529 bool expected = _exp_region_bm->at(index);
1406 1530 bool actual = _region_bm->at(index);
1407 1531 if (expected && !actual) {
1408 1532 if (_verbose) {
1409 1533 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1410 1534 "expected: %s, actual: %s",
1411 1535 hr->hrs_index(),
1412 1536 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1413 1537 }
1414 1538 failures += 1;
1415 1539 }
1416 1540
1417 1541 // Verify that the card bit maps for the cards spanned by the current
1418 1542 // region match. We have an error if we have a set bit in the expected
1419 1543 // bit map and the corresponding bit in the actual bitmap is not set.
1420 1544
1421 1545 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1422 1546 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1423 1547
1424 1548 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1425 1549 expected = _exp_card_bm->at(i);
1426 1550 actual = _card_bm->at(i);
1427 1551
1428 1552 if (expected && !actual) {
1429 1553 if (_verbose) {
1430 1554 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1431 1555 "expected: %s, actual: %s",
1432 1556 hr->hrs_index(), i,
1433 1557 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1434 1558 }
1435 1559 failures += 1;
1436 1560 }
1437 1561 }
1438 1562
1439 1563 if (failures > 0 && _verbose) {
1440 1564 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1441 1565 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1442 1566 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1443 1567 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1444 1568 }
1445 1569
1446 1570 _failures += failures;
1447 1571
1448 1572 // We could stop iteration over the heap when we
1449 1573 // find the first violating region by returning true.
1450 1574 return false;
1451 1575 }
1452 1576 };
1453 1577
1454 1578
1455 1579 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1456 1580 protected:
1457 1581 G1CollectedHeap* _g1h;
1458 1582 ConcurrentMark* _cm;
1459 1583 BitMap* _actual_region_bm;
1460 1584 BitMap* _actual_card_bm;
1461 1585
1462 1586 uint _n_workers;
1463 1587
1464 1588 BitMap* _expected_region_bm;
1465 1589 BitMap* _expected_card_bm;
1466 1590
1467 1591 int _failures;
1468 1592 bool _verbose;
1469 1593
1470 1594 public:
1471 1595 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1472 1596 BitMap* region_bm, BitMap* card_bm,
1473 1597 BitMap* expected_region_bm, BitMap* expected_card_bm)
1474 1598 : AbstractGangTask("G1 verify final counting"),
1475 1599 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1476 1600 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1477 1601 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1478 1602 _failures(0), _verbose(false),
1479 1603 _n_workers(0) {
1480 1604 assert(VerifyDuringGC, "don't call this otherwise");
1481 1605
1482 1606 // Use the value already set as the number of active threads
1483 1607 // in the call to run_task().
1484 1608 if (G1CollectedHeap::use_parallel_gc_threads()) {
1485 1609 assert( _g1h->workers()->active_workers() > 0,
1486 1610 "Should have been previously set");
1487 1611 _n_workers = _g1h->workers()->active_workers();
1488 1612 } else {
1489 1613 _n_workers = 1;
1490 1614 }
1491 1615
1492 1616 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1493 1617 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1494 1618
1495 1619 _verbose = _cm->verbose_medium();
1496 1620 }
1497 1621
1498 1622 void work(uint worker_id) {
1499 1623 assert(worker_id < _n_workers, "invariant");
1500 1624
1501 1625 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1502 1626 _actual_region_bm, _actual_card_bm,
1503 1627 _expected_region_bm,
1504 1628 _expected_card_bm,
1505 1629 _verbose);
1506 1630
1507 1631 if (G1CollectedHeap::use_parallel_gc_threads()) {
1508 1632 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1509 1633 worker_id,
1510 1634 _n_workers,
1511 1635 HeapRegion::VerifyCountClaimValue);
1512 1636 } else {
1513 1637 _g1h->heap_region_iterate(&verify_cl);
1514 1638 }
1515 1639
1516 1640 Atomic::add(verify_cl.failures(), &_failures);
1517 1641 }
1518 1642
1519 1643 int failures() const { return _failures; }
1520 1644 };
1521 1645
1522 1646 // Closure that finalizes the liveness counting data.
1523 1647 // Used during the cleanup pause.
1524 1648 // Sets the bits corresponding to the interval [NTAMS, top]
1525 1649 // (which contains the implicitly live objects) in the
1526 1650 // card liveness bitmap. Also sets the bit for each region,
1527 1651 // containing live data, in the region liveness bitmap.
1528 1652
1529 1653 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1530 1654 public:
1531 1655 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1532 1656 BitMap* region_bm,
1533 1657 BitMap* card_bm) :
1534 1658 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1535 1659
1536 1660 bool doHeapRegion(HeapRegion* hr) {
1537 1661
1538 1662 if (hr->continuesHumongous()) {
1539 1663 // We will ignore these here and process them when their
1540 1664 // associated "starts humongous" region is processed (see
1541 1665 // set_bit_for_heap_region()). Note that we cannot rely on their
1542 1666 // associated "starts humongous" region to have their bit set to
1543 1667 // 1 since, due to the region chunking in the parallel region
1544 1668 // iteration, a "continues humongous" region might be visited
1545 1669 // before its associated "starts humongous".
1546 1670 return false;
1547 1671 }
1548 1672
1549 1673 HeapWord* ntams = hr->next_top_at_mark_start();
1550 1674 HeapWord* top = hr->top();
1551 1675
1552 1676 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1553 1677
1554 1678 // Mark the allocated-since-marking portion...
1555 1679 if (ntams < top) {
1556 1680 // This definitely means the region has live objects.
1557 1681 set_bit_for_region(hr);
1558 1682
1559 1683 // Now set the bits in the card bitmap for [ntams, top)
1560 1684 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1561 1685 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1562 1686
1563 1687 // Note: if we're looking at the last region in heap - top
1564 1688 // could be actually just beyond the end of the heap; end_idx
1565 1689 // will then correspond to a (non-existent) card that is also
1566 1690 // just beyond the heap.
1567 1691 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1568 1692 // end of object is not card aligned - increment to cover
1569 1693 // all the cards spanned by the object
1570 1694 end_idx += 1;
1571 1695 }
1572 1696
1573 1697 assert(end_idx <= _card_bm->size(),
1574 1698 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1575 1699 end_idx, _card_bm->size()));
1576 1700 assert(start_idx < _card_bm->size(),
1577 1701 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1578 1702 start_idx, _card_bm->size()));
1579 1703
1580 1704 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1581 1705 }
1582 1706
1583 1707 // Set the bit for the region if it contains live data
1584 1708 if (hr->next_marked_bytes() > 0) {
1585 1709 set_bit_for_region(hr);
1586 1710 }
1587 1711
1588 1712 return false;
1589 1713 }
1590 1714 };
1591 1715
1592 1716 class G1ParFinalCountTask: public AbstractGangTask {
1593 1717 protected:
1594 1718 G1CollectedHeap* _g1h;
1595 1719 ConcurrentMark* _cm;
1596 1720 BitMap* _actual_region_bm;
1597 1721 BitMap* _actual_card_bm;
1598 1722
1599 1723 uint _n_workers;
1600 1724
1601 1725 public:
1602 1726 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1603 1727 : AbstractGangTask("G1 final counting"),
1604 1728 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1605 1729 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1606 1730 _n_workers(0) {
1607 1731 // Use the value already set as the number of active threads
1608 1732 // in the call to run_task().
1609 1733 if (G1CollectedHeap::use_parallel_gc_threads()) {
1610 1734 assert( _g1h->workers()->active_workers() > 0,
1611 1735 "Should have been previously set");
1612 1736 _n_workers = _g1h->workers()->active_workers();
1613 1737 } else {
1614 1738 _n_workers = 1;
1615 1739 }
1616 1740 }
1617 1741
1618 1742 void work(uint worker_id) {
1619 1743 assert(worker_id < _n_workers, "invariant");
1620 1744
1621 1745 FinalCountDataUpdateClosure final_update_cl(_g1h,
1622 1746 _actual_region_bm,
1623 1747 _actual_card_bm);
1624 1748
1625 1749 if (G1CollectedHeap::use_parallel_gc_threads()) {
1626 1750 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1627 1751 worker_id,
1628 1752 _n_workers,
1629 1753 HeapRegion::FinalCountClaimValue);
1630 1754 } else {
1631 1755 _g1h->heap_region_iterate(&final_update_cl);
1632 1756 }
1633 1757 }
1634 1758 };
1635 1759
1636 1760 class G1ParNoteEndTask;
1637 1761
1638 1762 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1639 1763 G1CollectedHeap* _g1;
1640 1764 int _worker_num;
1641 1765 size_t _max_live_bytes;
1642 1766 uint _regions_claimed;
1643 1767 size_t _freed_bytes;
1644 1768 FreeRegionList* _local_cleanup_list;
1645 1769 OldRegionSet* _old_proxy_set;
1646 1770 HumongousRegionSet* _humongous_proxy_set;
1647 1771 HRRSCleanupTask* _hrrs_cleanup_task;
1648 1772 double _claimed_region_time;
1649 1773 double _max_region_time;
1650 1774
1651 1775 public:
1652 1776 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1653 1777 int worker_num,
1654 1778 FreeRegionList* local_cleanup_list,
1655 1779 OldRegionSet* old_proxy_set,
1656 1780 HumongousRegionSet* humongous_proxy_set,
1657 1781 HRRSCleanupTask* hrrs_cleanup_task) :
1658 1782 _g1(g1), _worker_num(worker_num),
1659 1783 _max_live_bytes(0), _regions_claimed(0),
1660 1784 _freed_bytes(0),
1661 1785 _claimed_region_time(0.0), _max_region_time(0.0),
1662 1786 _local_cleanup_list(local_cleanup_list),
1663 1787 _old_proxy_set(old_proxy_set),
1664 1788 _humongous_proxy_set(humongous_proxy_set),
1665 1789 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1666 1790
1667 1791 size_t freed_bytes() { return _freed_bytes; }
1668 1792
1669 1793 bool doHeapRegion(HeapRegion *hr) {
1670 1794 if (hr->continuesHumongous()) {
1671 1795 return false;
1672 1796 }
1673 1797 // We use a claim value of zero here because all regions
1674 1798 // were claimed with value 1 in the FinalCount task.
1675 1799 _g1->reset_gc_time_stamps(hr);
1676 1800 double start = os::elapsedTime();
1677 1801 _regions_claimed++;
1678 1802 hr->note_end_of_marking();
1679 1803 _max_live_bytes += hr->max_live_bytes();
1680 1804 _g1->free_region_if_empty(hr,
1681 1805 &_freed_bytes,
1682 1806 _local_cleanup_list,
1683 1807 _old_proxy_set,
1684 1808 _humongous_proxy_set,
1685 1809 _hrrs_cleanup_task,
1686 1810 true /* par */);
1687 1811 double region_time = (os::elapsedTime() - start);
1688 1812 _claimed_region_time += region_time;
1689 1813 if (region_time > _max_region_time) {
1690 1814 _max_region_time = region_time;
1691 1815 }
1692 1816 return false;
1693 1817 }
1694 1818
1695 1819 size_t max_live_bytes() { return _max_live_bytes; }
1696 1820 uint regions_claimed() { return _regions_claimed; }
1697 1821 double claimed_region_time_sec() { return _claimed_region_time; }
1698 1822 double max_region_time_sec() { return _max_region_time; }
1699 1823 };
1700 1824
1701 1825 class G1ParNoteEndTask: public AbstractGangTask {
1702 1826 friend class G1NoteEndOfConcMarkClosure;
1703 1827
1704 1828 protected:
1705 1829 G1CollectedHeap* _g1h;
1706 1830 size_t _max_live_bytes;
1707 1831 size_t _freed_bytes;
1708 1832 FreeRegionList* _cleanup_list;
1709 1833
1710 1834 public:
1711 1835 G1ParNoteEndTask(G1CollectedHeap* g1h,
1712 1836 FreeRegionList* cleanup_list) :
1713 1837 AbstractGangTask("G1 note end"), _g1h(g1h),
1714 1838 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1715 1839
1716 1840 void work(uint worker_id) {
1717 1841 double start = os::elapsedTime();
1718 1842 FreeRegionList local_cleanup_list("Local Cleanup List");
1719 1843 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1720 1844 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1721 1845 HRRSCleanupTask hrrs_cleanup_task;
1722 1846 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1723 1847 &old_proxy_set,
1724 1848 &humongous_proxy_set,
1725 1849 &hrrs_cleanup_task);
1726 1850 if (G1CollectedHeap::use_parallel_gc_threads()) {
1727 1851 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1728 1852 _g1h->workers()->active_workers(),
1729 1853 HeapRegion::NoteEndClaimValue);
1730 1854 } else {
1731 1855 _g1h->heap_region_iterate(&g1_note_end);
1732 1856 }
1733 1857 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1734 1858
1735 1859 // Now update the lists
1736 1860 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1737 1861 NULL /* free_list */,
1738 1862 &old_proxy_set,
1739 1863 &humongous_proxy_set,
1740 1864 true /* par */);
1741 1865 {
1742 1866 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1743 1867 _max_live_bytes += g1_note_end.max_live_bytes();
1744 1868 _freed_bytes += g1_note_end.freed_bytes();
1745 1869
1746 1870 // If we iterate over the global cleanup list at the end of
1747 1871 // cleanup to do this printing we will not guarantee to only
1748 1872 // generate output for the newly-reclaimed regions (the list
1749 1873 // might not be empty at the beginning of cleanup; we might
1750 1874 // still be working on its previous contents). So we do the
1751 1875 // printing here, before we append the new regions to the global
1752 1876 // cleanup list.
1753 1877
1754 1878 G1HRPrinter* hr_printer = _g1h->hr_printer();
1755 1879 if (hr_printer->is_active()) {
1756 1880 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1757 1881 while (iter.more_available()) {
1758 1882 HeapRegion* hr = iter.get_next();
1759 1883 hr_printer->cleanup(hr);
1760 1884 }
1761 1885 }
1762 1886
1763 1887 _cleanup_list->add_as_tail(&local_cleanup_list);
1764 1888 assert(local_cleanup_list.is_empty(), "post-condition");
1765 1889
1766 1890 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1767 1891 }
1768 1892 }
1769 1893 size_t max_live_bytes() { return _max_live_bytes; }
1770 1894 size_t freed_bytes() { return _freed_bytes; }
1771 1895 };
1772 1896
1773 1897 class G1ParScrubRemSetTask: public AbstractGangTask {
1774 1898 protected:
1775 1899 G1RemSet* _g1rs;
1776 1900 BitMap* _region_bm;
1777 1901 BitMap* _card_bm;
1778 1902 public:
1779 1903 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1780 1904 BitMap* region_bm, BitMap* card_bm) :
1781 1905 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1782 1906 _region_bm(region_bm), _card_bm(card_bm) { }
1783 1907
1784 1908 void work(uint worker_id) {
1785 1909 if (G1CollectedHeap::use_parallel_gc_threads()) {
1786 1910 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1787 1911 HeapRegion::ScrubRemSetClaimValue);
1788 1912 } else {
1789 1913 _g1rs->scrub(_region_bm, _card_bm);
1790 1914 }
1791 1915 }
1792 1916
1793 1917 };
1794 1918
1795 1919 void ConcurrentMark::cleanup() {
1796 1920 // world is stopped at this checkpoint
1797 1921 assert(SafepointSynchronize::is_at_safepoint(),
1798 1922 "world should be stopped");
1799 1923 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1800 1924
1801 1925 // If a full collection has happened, we shouldn't do this.
1802 1926 if (has_aborted()) {
1803 1927 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1804 1928 return;
1805 1929 }
1806 1930
1807 1931 HRSPhaseSetter x(HRSPhaseCleanup);
1808 1932 g1h->verify_region_sets_optional();
1809 1933
1810 1934 if (VerifyDuringGC) {
1811 1935 HandleMark hm; // handle scope
1812 1936 gclog_or_tty->print(" VerifyDuringGC:(before)");
1813 1937 Universe::heap()->prepare_for_verify();
1814 1938 Universe::verify(/* silent */ false,
1815 1939 /* option */ VerifyOption_G1UsePrevMarking);
1816 1940 }
1817 1941
1818 1942 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1819 1943 g1p->record_concurrent_mark_cleanup_start();
1820 1944
1821 1945 double start = os::elapsedTime();
1822 1946
1823 1947 HeapRegionRemSet::reset_for_cleanup_tasks();
1824 1948
1825 1949 uint n_workers;
1826 1950
1827 1951 // Do counting once more with the world stopped for good measure.
1828 1952 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1829 1953
1830 1954 if (G1CollectedHeap::use_parallel_gc_threads()) {
1831 1955 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1832 1956 "sanity check");
1833 1957
1834 1958 g1h->set_par_threads();
1835 1959 n_workers = g1h->n_par_threads();
1836 1960 assert(g1h->n_par_threads() == n_workers,
1837 1961 "Should not have been reset");
1838 1962 g1h->workers()->run_task(&g1_par_count_task);
1839 1963 // Done with the parallel phase so reset to 0.
1840 1964 g1h->set_par_threads(0);
1841 1965
1842 1966 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1843 1967 "sanity check");
1844 1968 } else {
1845 1969 n_workers = 1;
1846 1970 g1_par_count_task.work(0);
1847 1971 }
1848 1972
1849 1973 if (VerifyDuringGC) {
1850 1974 // Verify that the counting data accumulated during marking matches
1851 1975 // that calculated by walking the marking bitmap.
1852 1976
1853 1977 // Bitmaps to hold expected values
1854 1978 BitMap expected_region_bm(_region_bm.size(), false);
1855 1979 BitMap expected_card_bm(_card_bm.size(), false);
1856 1980
1857 1981 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1858 1982 &_region_bm,
1859 1983 &_card_bm,
1860 1984 &expected_region_bm,
1861 1985 &expected_card_bm);
1862 1986
1863 1987 if (G1CollectedHeap::use_parallel_gc_threads()) {
1864 1988 g1h->set_par_threads((int)n_workers);
1865 1989 g1h->workers()->run_task(&g1_par_verify_task);
1866 1990 // Done with the parallel phase so reset to 0.
1867 1991 g1h->set_par_threads(0);
1868 1992
1869 1993 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1870 1994 "sanity check");
1871 1995 } else {
1872 1996 g1_par_verify_task.work(0);
1873 1997 }
1874 1998
1875 1999 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1876 2000 }
1877 2001
1878 2002 size_t start_used_bytes = g1h->used();
1879 2003 g1h->set_marking_complete();
1880 2004
1881 2005 double count_end = os::elapsedTime();
1882 2006 double this_final_counting_time = (count_end - start);
1883 2007 _total_counting_time += this_final_counting_time;
1884 2008
1885 2009 if (G1PrintRegionLivenessInfo) {
1886 2010 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1887 2011 _g1h->heap_region_iterate(&cl);
1888 2012 }
1889 2013
1890 2014 // Install newly created mark bitMap as "prev".
1891 2015 swapMarkBitMaps();
1892 2016
1893 2017 g1h->reset_gc_time_stamp();
1894 2018
1895 2019 // Note end of marking in all heap regions.
1896 2020 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1897 2021 if (G1CollectedHeap::use_parallel_gc_threads()) {
1898 2022 g1h->set_par_threads((int)n_workers);
1899 2023 g1h->workers()->run_task(&g1_par_note_end_task);
1900 2024 g1h->set_par_threads(0);
1901 2025
1902 2026 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1903 2027 "sanity check");
1904 2028 } else {
1905 2029 g1_par_note_end_task.work(0);
1906 2030 }
1907 2031 g1h->check_gc_time_stamps();
1908 2032
1909 2033 if (!cleanup_list_is_empty()) {
1910 2034 // The cleanup list is not empty, so we'll have to process it
1911 2035 // concurrently. Notify anyone else that might be wanting free
1912 2036 // regions that there will be more free regions coming soon.
1913 2037 g1h->set_free_regions_coming();
1914 2038 }
1915 2039
1916 2040 // call below, since it affects the metric by which we sort the heap
1917 2041 // regions.
1918 2042 if (G1ScrubRemSets) {
1919 2043 double rs_scrub_start = os::elapsedTime();
1920 2044 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1921 2045 if (G1CollectedHeap::use_parallel_gc_threads()) {
1922 2046 g1h->set_par_threads((int)n_workers);
1923 2047 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1924 2048 g1h->set_par_threads(0);
1925 2049
1926 2050 assert(g1h->check_heap_region_claim_values(
1927 2051 HeapRegion::ScrubRemSetClaimValue),
1928 2052 "sanity check");
1929 2053 } else {
1930 2054 g1_par_scrub_rs_task.work(0);
1931 2055 }
1932 2056
1933 2057 double rs_scrub_end = os::elapsedTime();
1934 2058 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1935 2059 _total_rs_scrub_time += this_rs_scrub_time;
1936 2060 }
1937 2061
1938 2062 // this will also free any regions totally full of garbage objects,
1939 2063 // and sort the regions.
1940 2064 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1941 2065
1942 2066 // Statistics.
1943 2067 double end = os::elapsedTime();
1944 2068 _cleanup_times.add((end - start) * 1000.0);
1945 2069
1946 2070 if (G1Log::fine()) {
1947 2071 g1h->print_size_transition(gclog_or_tty,
1948 2072 start_used_bytes,
1949 2073 g1h->used(),
1950 2074 g1h->capacity());
1951 2075 }
1952 2076
1953 2077 // Clean up will have freed any regions completely full of garbage.
1954 2078 // Update the soft reference policy with the new heap occupancy.
1955 2079 Universe::update_heap_info_at_gc();
1956 2080
1957 2081 // We need to make this be a "collection" so any collection pause that
1958 2082 // races with it goes around and waits for completeCleanup to finish.
1959 2083 g1h->increment_total_collections();
1960 2084
1961 2085 // We reclaimed old regions so we should calculate the sizes to make
1962 2086 // sure we update the old gen/space data.
1963 2087 g1h->g1mm()->update_sizes();
1964 2088
1965 2089 if (VerifyDuringGC) {
1966 2090 HandleMark hm; // handle scope
1967 2091 gclog_or_tty->print(" VerifyDuringGC:(after)");
1968 2092 Universe::heap()->prepare_for_verify();
1969 2093 Universe::verify(/* silent */ false,
1970 2094 /* option */ VerifyOption_G1UsePrevMarking);
1971 2095 }
1972 2096
1973 2097 g1h->verify_region_sets_optional();
1974 2098 }
1975 2099
1976 2100 void ConcurrentMark::completeCleanup() {
1977 2101 if (has_aborted()) return;
1978 2102
1979 2103 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1980 2104
1981 2105 _cleanup_list.verify_optional();
1982 2106 FreeRegionList tmp_free_list("Tmp Free List");
1983 2107
1984 2108 if (G1ConcRegionFreeingVerbose) {
1985 2109 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1986 2110 "cleanup list has %u entries",
1987 2111 _cleanup_list.length());
1988 2112 }
1989 2113
1990 2114 // Noone else should be accessing the _cleanup_list at this point,
1991 2115 // so it's not necessary to take any locks
1992 2116 while (!_cleanup_list.is_empty()) {
1993 2117 HeapRegion* hr = _cleanup_list.remove_head();
1994 2118 assert(hr != NULL, "the list was not empty");
1995 2119 hr->par_clear();
1996 2120 tmp_free_list.add_as_tail(hr);
1997 2121
1998 2122 // Instead of adding one region at a time to the secondary_free_list,
1999 2123 // we accumulate them in the local list and move them a few at a
2000 2124 // time. This also cuts down on the number of notify_all() calls
2001 2125 // we do during this process. We'll also append the local list when
2002 2126 // _cleanup_list is empty (which means we just removed the last
2003 2127 // region from the _cleanup_list).
2004 2128 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2005 2129 _cleanup_list.is_empty()) {
2006 2130 if (G1ConcRegionFreeingVerbose) {
2007 2131 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2008 2132 "appending %u entries to the secondary_free_list, "
2009 2133 "cleanup list still has %u entries",
2010 2134 tmp_free_list.length(),
2011 2135 _cleanup_list.length());
2012 2136 }
2013 2137
2014 2138 {
2015 2139 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2016 2140 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2017 2141 SecondaryFreeList_lock->notify_all();
2018 2142 }
2019 2143
2020 2144 if (G1StressConcRegionFreeing) {
2021 2145 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2022 2146 os::sleep(Thread::current(), (jlong) 1, false);
2023 2147 }
2024 2148 }
2025 2149 }
2026 2150 }
2027 2151 assert(tmp_free_list.is_empty(), "post-condition");
2028 2152 }
2029 2153
2030 2154 // Support closures for reference procssing in G1
2031 2155
2032 2156 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2033 2157 HeapWord* addr = (HeapWord*)obj;
2034 2158 return addr != NULL &&
2035 2159 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2036 2160 }
2037 2161
2038 2162 class G1CMKeepAliveClosure: public ExtendedOopClosure {
2039 2163 G1CollectedHeap* _g1;
2040 2164 ConcurrentMark* _cm;
2041 2165 public:
2042 2166 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2043 2167 _g1(g1), _cm(cm) {
2044 2168 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2045 2169 }
2046 2170
2047 2171 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2048 2172 virtual void do_oop( oop* p) { do_oop_work(p); }
2049 2173
2050 2174 template <class T> void do_oop_work(T* p) {
2051 2175 oop obj = oopDesc::load_decode_heap_oop(p);
2052 2176 HeapWord* addr = (HeapWord*)obj;
2053 2177
2054 2178 if (_cm->verbose_high()) {
2055 2179 gclog_or_tty->print_cr("\t[0] we're looking at location "
2056 2180 "*"PTR_FORMAT" = "PTR_FORMAT,
2057 2181 p, (void*) obj);
2058 2182 }
2059 2183
2060 2184 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2061 2185 _cm->mark_and_count(obj);
2062 2186 _cm->mark_stack_push(obj);
2063 2187 }
2064 2188 }
2065 2189 };
2066 2190
2067 2191 class G1CMDrainMarkingStackClosure: public VoidClosure {
2068 2192 ConcurrentMark* _cm;
2069 2193 CMMarkStack* _markStack;
2070 2194 G1CMKeepAliveClosure* _oopClosure;
2071 2195 public:
2072 2196 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2073 2197 G1CMKeepAliveClosure* oopClosure) :
2074 2198 _cm(cm),
2075 2199 _markStack(markStack),
2076 2200 _oopClosure(oopClosure) { }
2077 2201
2078 2202 void do_void() {
2079 2203 _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
2080 2204 }
2081 2205 };
2082 2206
2083 2207 // 'Keep Alive' closure used by parallel reference processing.
2084 2208 // An instance of this closure is used in the parallel reference processing
2085 2209 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2086 2210 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2087 2211 // placed on to discovered ref lists once so we can mark and push with no
2088 2212 // need to check whether the object has already been marked. Using the
2089 2213 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2090 2214 // operating on the global mark stack. This means that an individual
2091 2215 // worker would be doing lock-free pushes while it processes its own
2092 2216 // discovered ref list followed by drain call. If the discovered ref lists
2093 2217 // are unbalanced then this could cause interference with the other
2094 2218 // workers. Using a CMTask (and its embedded local data structures)
2095 2219 // avoids that potential interference.
2096 2220 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2097 2221 ConcurrentMark* _cm;
2098 2222 CMTask* _task;
2099 2223 int _ref_counter_limit;
2100 2224 int _ref_counter;
2101 2225 public:
2102 2226 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2103 2227 _cm(cm), _task(task),
2104 2228 _ref_counter_limit(G1RefProcDrainInterval) {
2105 2229 assert(_ref_counter_limit > 0, "sanity");
2106 2230 _ref_counter = _ref_counter_limit;
2107 2231 }
2108 2232
2109 2233 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2110 2234 virtual void do_oop( oop* p) { do_oop_work(p); }
2111 2235
2112 2236 template <class T> void do_oop_work(T* p) {
2113 2237 if (!_cm->has_overflown()) {
2114 2238 oop obj = oopDesc::load_decode_heap_oop(p);
2115 2239 if (_cm->verbose_high()) {
2116 2240 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2117 2241 "*"PTR_FORMAT" = "PTR_FORMAT,
2118 2242 _task->worker_id(), p, (void*) obj);
2119 2243 }
2120 2244
2121 2245 _task->deal_with_reference(obj);
2122 2246 _ref_counter--;
2123 2247
2124 2248 if (_ref_counter == 0) {
2125 2249 // We have dealt with _ref_counter_limit references, pushing them and objects
2126 2250 // reachable from them on to the local stack (and possibly the global stack).
2127 2251 // Call do_marking_step() to process these entries. We call the routine in a
2128 2252 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2129 2253 // with the entries that we've pushed as a result of the deal_with_reference
2130 2254 // calls above) or we overflow.
2131 2255 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2132 2256 // while there may still be some work to do. (See the comment at the
2133 2257 // beginning of CMTask::do_marking_step() for those conditions - one of which
2134 2258 // is reaching the specified time target.) It is only when
2135 2259 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2136 2260 // that the marking has completed.
2137 2261 do {
2138 2262 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2139 2263 _task->do_marking_step(mark_step_duration_ms,
2140 2264 false /* do_stealing */,
2141 2265 false /* do_termination */);
2142 2266 } while (_task->has_aborted() && !_cm->has_overflown());
2143 2267 _ref_counter = _ref_counter_limit;
2144 2268 }
2145 2269 } else {
2146 2270 if (_cm->verbose_high()) {
2147 2271 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2148 2272 }
2149 2273 }
2150 2274 }
2151 2275 };
2152 2276
2153 2277 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2154 2278 ConcurrentMark* _cm;
2155 2279 CMTask* _task;
2156 2280 public:
2157 2281 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2158 2282 _cm(cm), _task(task) { }
2159 2283
2160 2284 void do_void() {
2161 2285 do {
2162 2286 if (_cm->verbose_high()) {
2163 2287 gclog_or_tty->print_cr("\t[%u] Drain: Calling do marking_step",
2164 2288 _task->worker_id());
2165 2289 }
2166 2290
2167 2291 // We call CMTask::do_marking_step() to completely drain the local and
2168 2292 // global marking stacks. The routine is called in a loop, which we'll
2169 2293 // exit if there's nothing more to do (i.e. we'completely drained the
2170 2294 // entries that were pushed as a result of applying the
2171 2295 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2172 2296 // lists above) or we overflow the global marking stack.
2173 2297 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2174 2298 // while there may still be some work to do. (See the comment at the
2175 2299 // beginning of CMTask::do_marking_step() for those conditions - one of which
2176 2300 // is reaching the specified time target.) It is only when
2177 2301 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2178 2302 // that the marking has completed.
2179 2303
2180 2304 _task->do_marking_step(1000000000.0 /* something very large */,
2181 2305 true /* do_stealing */,
2182 2306 true /* do_termination */);
2183 2307 } while (_task->has_aborted() && !_cm->has_overflown());
2184 2308 }
2185 2309 };
2186 2310
2187 2311 // Implementation of AbstractRefProcTaskExecutor for parallel
2188 2312 // reference processing at the end of G1 concurrent marking
2189 2313
2190 2314 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2191 2315 private:
2192 2316 G1CollectedHeap* _g1h;
2193 2317 ConcurrentMark* _cm;
2194 2318 WorkGang* _workers;
2195 2319 int _active_workers;
2196 2320
2197 2321 public:
2198 2322 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2199 2323 ConcurrentMark* cm,
2200 2324 WorkGang* workers,
2201 2325 int n_workers) :
2202 2326 _g1h(g1h), _cm(cm),
2203 2327 _workers(workers), _active_workers(n_workers) { }
2204 2328
2205 2329 // Executes the given task using concurrent marking worker threads.
2206 2330 virtual void execute(ProcessTask& task);
2207 2331 virtual void execute(EnqueueTask& task);
2208 2332 };
2209 2333
2210 2334 class G1CMRefProcTaskProxy: public AbstractGangTask {
2211 2335 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2212 2336 ProcessTask& _proc_task;
2213 2337 G1CollectedHeap* _g1h;
2214 2338 ConcurrentMark* _cm;
2215 2339
2216 2340 public:
2217 2341 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2218 2342 G1CollectedHeap* g1h,
2219 2343 ConcurrentMark* cm) :
2220 2344 AbstractGangTask("Process reference objects in parallel"),
2221 2345 _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2222 2346
2223 2347 virtual void work(uint worker_id) {
2224 2348 CMTask* marking_task = _cm->task(worker_id);
2225 2349 G1CMIsAliveClosure g1_is_alive(_g1h);
2226 2350 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2227 2351 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2228 2352
2229 2353 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2230 2354 }
2231 2355 };
2232 2356
2233 2357 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2234 2358 assert(_workers != NULL, "Need parallel worker threads.");
2235 2359
2236 2360 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2237 2361
2238 2362 // We need to reset the phase for each task execution so that
2239 2363 // the termination protocol of CMTask::do_marking_step works.
2240 2364 _cm->set_phase(_active_workers, false /* concurrent */);
2241 2365 _g1h->set_par_threads(_active_workers);
2242 2366 _workers->run_task(&proc_task_proxy);
2243 2367 _g1h->set_par_threads(0);
2244 2368 }
2245 2369
2246 2370 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2247 2371 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2248 2372 EnqueueTask& _enq_task;
2249 2373
2250 2374 public:
2251 2375 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2252 2376 AbstractGangTask("Enqueue reference objects in parallel"),
2253 2377 _enq_task(enq_task) { }
2254 2378
2255 2379 virtual void work(uint worker_id) {
2256 2380 _enq_task.work(worker_id);
2257 2381 }
2258 2382 };
2259 2383
2260 2384 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2261 2385 assert(_workers != NULL, "Need parallel worker threads.");
2262 2386
2263 2387 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2264 2388
2265 2389 _g1h->set_par_threads(_active_workers);
2266 2390 _workers->run_task(&enq_task_proxy);
2267 2391 _g1h->set_par_threads(0);
2268 2392 }
2269 2393
2270 2394 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2271 2395 ResourceMark rm;
2272 2396 HandleMark hm;
2273 2397
2274 2398 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2275 2399
2276 2400 // Is alive closure.
2277 2401 G1CMIsAliveClosure g1_is_alive(g1h);
2278 2402
2279 2403 // Inner scope to exclude the cleaning of the string and symbol
2280 2404 // tables from the displayed time.
2281 2405 {
2282 2406 if (G1Log::finer()) {
2283 2407 gclog_or_tty->put(' ');
2284 2408 }
2285 2409 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2286 2410
2287 2411 ReferenceProcessor* rp = g1h->ref_processor_cm();
2288 2412
2289 2413 // See the comment in G1CollectedHeap::ref_processing_init()
2290 2414 // about how reference processing currently works in G1.
2291 2415
2292 2416 // Process weak references.
2293 2417 rp->setup_policy(clear_all_soft_refs);
2294 2418 assert(_markStack.isEmpty(), "mark stack should be empty");
2295 2419
2296 2420 G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2297 2421 G1CMDrainMarkingStackClosure
2298 2422 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2299 2423
2300 2424 // We use the work gang from the G1CollectedHeap and we utilize all
2301 2425 // the worker threads.
2302 2426 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2303 2427 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2304 2428
2305 2429 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2306 2430 g1h->workers(), active_workers);
2307 2431
2308 2432 if (rp->processing_is_mt()) {
2309 2433 // Set the degree of MT here. If the discovery is done MT, there
2310 2434 // may have been a different number of threads doing the discovery
2311 2435 // and a different number of discovered lists may have Ref objects.
2312 2436 // That is OK as long as the Reference lists are balanced (see
2313 2437 // balance_all_queues() and balance_queues()).
2314 2438 rp->set_active_mt_degree(active_workers);
2315 2439
2316 2440 rp->process_discovered_references(&g1_is_alive,
2317 2441 &g1_keep_alive,
2318 2442 &g1_drain_mark_stack,
2319 2443 &par_task_executor);
2320 2444
2321 2445 // The work routines of the parallel keep_alive and drain_marking_stack
2322 2446 // will set the has_overflown flag if we overflow the global marking
2323 2447 // stack.
2324 2448 } else {
2325 2449 rp->process_discovered_references(&g1_is_alive,
2326 2450 &g1_keep_alive,
2327 2451 &g1_drain_mark_stack,
2328 2452 NULL);
2329 2453 }
2330 2454
2331 2455 assert(_markStack.overflow() || _markStack.isEmpty(),
2332 2456 "mark stack should be empty (unless it overflowed)");
2333 2457 if (_markStack.overflow()) {
2334 2458 // Should have been done already when we tried to push an
2335 2459 // entry on to the global mark stack. But let's do it again.
2336 2460 set_has_overflown();
2337 2461 }
2338 2462
2339 2463 if (rp->processing_is_mt()) {
2340 2464 assert(rp->num_q() == active_workers, "why not");
2341 2465 rp->enqueue_discovered_references(&par_task_executor);
2342 2466 } else {
2343 2467 rp->enqueue_discovered_references();
2344 2468 }
2345 2469
2346 2470 rp->verify_no_references_recorded();
2347 2471 assert(!rp->discovery_enabled(), "Post condition");
2348 2472 }
2349 2473
2350 2474 // Now clean up stale oops in StringTable
2351 2475 StringTable::unlink(&g1_is_alive);
2352 2476 // Clean up unreferenced symbols in symbol table.
2353 2477 SymbolTable::unlink();
2354 2478 }
2355 2479
2356 2480 void ConcurrentMark::swapMarkBitMaps() {
2357 2481 CMBitMapRO* temp = _prevMarkBitMap;
2358 2482 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2359 2483 _nextMarkBitMap = (CMBitMap*) temp;
2360 2484 }
2361 2485
2362 2486 class CMRemarkTask: public AbstractGangTask {
2363 2487 private:
2364 2488 ConcurrentMark *_cm;
2365 2489
2366 2490 public:
2367 2491 void work(uint worker_id) {
2368 2492 // Since all available tasks are actually started, we should
2369 2493 // only proceed if we're supposed to be actived.
2370 2494 if (worker_id < _cm->active_tasks()) {
2371 2495 CMTask* task = _cm->task(worker_id);
2372 2496 task->record_start_time();
2373 2497 do {
2374 2498 task->do_marking_step(1000000000.0 /* something very large */,
2375 2499 true /* do_stealing */,
2376 2500 true /* do_termination */);
2377 2501 } while (task->has_aborted() && !_cm->has_overflown());
2378 2502 // If we overflow, then we do not want to restart. We instead
2379 2503 // want to abort remark and do concurrent marking again.
2380 2504 task->record_end_time();
2381 2505 }
2382 2506 }
2383 2507
2384 2508 CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2385 2509 AbstractGangTask("Par Remark"), _cm(cm) {
2386 2510 _cm->terminator()->reset_for_reuse(active_workers);
2387 2511 }
2388 2512 };
2389 2513
2390 2514 void ConcurrentMark::checkpointRootsFinalWork() {
2391 2515 ResourceMark rm;
2392 2516 HandleMark hm;
2393 2517 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2394 2518
2395 2519 g1h->ensure_parsability(false);
2396 2520
2397 2521 if (G1CollectedHeap::use_parallel_gc_threads()) {
2398 2522 G1CollectedHeap::StrongRootsScope srs(g1h);
2399 2523 // this is remark, so we'll use up all active threads
2400 2524 uint active_workers = g1h->workers()->active_workers();
2401 2525 if (active_workers == 0) {
2402 2526 assert(active_workers > 0, "Should have been set earlier");
2403 2527 active_workers = (uint) ParallelGCThreads;
2404 2528 g1h->workers()->set_active_workers(active_workers);
2405 2529 }
2406 2530 set_phase(active_workers, false /* concurrent */);
2407 2531 // Leave _parallel_marking_threads at it's
2408 2532 // value originally calculated in the ConcurrentMark
2409 2533 // constructor and pass values of the active workers
2410 2534 // through the gang in the task.
2411 2535
2412 2536 CMRemarkTask remarkTask(this, active_workers);
2413 2537 g1h->set_par_threads(active_workers);
2414 2538 g1h->workers()->run_task(&remarkTask);
2415 2539 g1h->set_par_threads(0);
2416 2540 } else {
2417 2541 G1CollectedHeap::StrongRootsScope srs(g1h);
2418 2542 // this is remark, so we'll use up all available threads
2419 2543 uint active_workers = 1;
2420 2544 set_phase(active_workers, false /* concurrent */);
2421 2545
2422 2546 CMRemarkTask remarkTask(this, active_workers);
2423 2547 // We will start all available threads, even if we decide that the
2424 2548 // active_workers will be fewer. The extra ones will just bail out
2425 2549 // immediately.
2426 2550 remarkTask.work(0);
2427 2551 }
2428 2552 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2429 2553 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2430 2554
2431 2555 print_stats();
2432 2556
2433 2557 #if VERIFY_OBJS_PROCESSED
2434 2558 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2435 2559 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2436 2560 _scan_obj_cl.objs_processed,
2437 2561 ThreadLocalObjQueue::objs_enqueued);
2438 2562 guarantee(_scan_obj_cl.objs_processed ==
2439 2563 ThreadLocalObjQueue::objs_enqueued,
2440 2564 "Different number of objs processed and enqueued.");
2441 2565 }
2442 2566 #endif
2443 2567 }
2444 2568
2445 2569 #ifndef PRODUCT
2446 2570
2447 2571 class PrintReachableOopClosure: public OopClosure {
2448 2572 private:
2449 2573 G1CollectedHeap* _g1h;
2450 2574 outputStream* _out;
2451 2575 VerifyOption _vo;
2452 2576 bool _all;
2453 2577
2454 2578 public:
2455 2579 PrintReachableOopClosure(outputStream* out,
2456 2580 VerifyOption vo,
2457 2581 bool all) :
2458 2582 _g1h(G1CollectedHeap::heap()),
2459 2583 _out(out), _vo(vo), _all(all) { }
2460 2584
2461 2585 void do_oop(narrowOop* p) { do_oop_work(p); }
2462 2586 void do_oop( oop* p) { do_oop_work(p); }
2463 2587
2464 2588 template <class T> void do_oop_work(T* p) {
2465 2589 oop obj = oopDesc::load_decode_heap_oop(p);
2466 2590 const char* str = NULL;
2467 2591 const char* str2 = "";
2468 2592
2469 2593 if (obj == NULL) {
2470 2594 str = "";
2471 2595 } else if (!_g1h->is_in_g1_reserved(obj)) {
2472 2596 str = " O";
2473 2597 } else {
2474 2598 HeapRegion* hr = _g1h->heap_region_containing(obj);
2475 2599 guarantee(hr != NULL, "invariant");
2476 2600 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2477 2601 bool marked = _g1h->is_marked(obj, _vo);
2478 2602
2479 2603 if (over_tams) {
2480 2604 str = " >";
2481 2605 if (marked) {
2482 2606 str2 = " AND MARKED";
2483 2607 }
2484 2608 } else if (marked) {
2485 2609 str = " M";
2486 2610 } else {
2487 2611 str = " NOT";
2488 2612 }
2489 2613 }
2490 2614
2491 2615 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2492 2616 p, (void*) obj, str, str2);
2493 2617 }
2494 2618 };
2495 2619
2496 2620 class PrintReachableObjectClosure : public ObjectClosure {
2497 2621 private:
2498 2622 G1CollectedHeap* _g1h;
2499 2623 outputStream* _out;
2500 2624 VerifyOption _vo;
2501 2625 bool _all;
2502 2626 HeapRegion* _hr;
2503 2627
2504 2628 public:
2505 2629 PrintReachableObjectClosure(outputStream* out,
2506 2630 VerifyOption vo,
2507 2631 bool all,
2508 2632 HeapRegion* hr) :
2509 2633 _g1h(G1CollectedHeap::heap()),
2510 2634 _out(out), _vo(vo), _all(all), _hr(hr) { }
2511 2635
2512 2636 void do_object(oop o) {
2513 2637 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2514 2638 bool marked = _g1h->is_marked(o, _vo);
2515 2639 bool print_it = _all || over_tams || marked;
2516 2640
2517 2641 if (print_it) {
2518 2642 _out->print_cr(" "PTR_FORMAT"%s",
2519 2643 o, (over_tams) ? " >" : (marked) ? " M" : "");
2520 2644 PrintReachableOopClosure oopCl(_out, _vo, _all);
2521 2645 o->oop_iterate_no_header(&oopCl);
2522 2646 }
2523 2647 }
2524 2648 };
2525 2649
2526 2650 class PrintReachableRegionClosure : public HeapRegionClosure {
2527 2651 private:
2528 2652 G1CollectedHeap* _g1h;
2529 2653 outputStream* _out;
2530 2654 VerifyOption _vo;
2531 2655 bool _all;
2532 2656
2533 2657 public:
2534 2658 bool doHeapRegion(HeapRegion* hr) {
2535 2659 HeapWord* b = hr->bottom();
2536 2660 HeapWord* e = hr->end();
2537 2661 HeapWord* t = hr->top();
2538 2662 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2539 2663 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2540 2664 "TAMS: "PTR_FORMAT, b, e, t, p);
2541 2665 _out->cr();
2542 2666
2543 2667 HeapWord* from = b;
2544 2668 HeapWord* to = t;
2545 2669
2546 2670 if (to > from) {
2547 2671 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2548 2672 _out->cr();
2549 2673 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2550 2674 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2551 2675 _out->cr();
2552 2676 }
2553 2677
2554 2678 return false;
2555 2679 }
2556 2680
2557 2681 PrintReachableRegionClosure(outputStream* out,
2558 2682 VerifyOption vo,
2559 2683 bool all) :
2560 2684 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2561 2685 };
2562 2686
2563 2687 void ConcurrentMark::print_reachable(const char* str,
2564 2688 VerifyOption vo,
2565 2689 bool all) {
2566 2690 gclog_or_tty->cr();
2567 2691 gclog_or_tty->print_cr("== Doing heap dump... ");
2568 2692
2569 2693 if (G1PrintReachableBaseFile == NULL) {
2570 2694 gclog_or_tty->print_cr(" #### error: no base file defined");
2571 2695 return;
2572 2696 }
2573 2697
2574 2698 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2575 2699 (JVM_MAXPATHLEN - 1)) {
2576 2700 gclog_or_tty->print_cr(" #### error: file name too long");
2577 2701 return;
2578 2702 }
2579 2703
2580 2704 char file_name[JVM_MAXPATHLEN];
2581 2705 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2582 2706 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2583 2707
2584 2708 fileStream fout(file_name);
2585 2709 if (!fout.is_open()) {
2586 2710 gclog_or_tty->print_cr(" #### error: could not open file");
2587 2711 return;
2588 2712 }
2589 2713
2590 2714 outputStream* out = &fout;
2591 2715 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2592 2716 out->cr();
2593 2717
2594 2718 out->print_cr("--- ITERATING OVER REGIONS");
2595 2719 out->cr();
2596 2720 PrintReachableRegionClosure rcl(out, vo, all);
2597 2721 _g1h->heap_region_iterate(&rcl);
2598 2722 out->cr();
2599 2723
2600 2724 gclog_or_tty->print_cr(" done");
2601 2725 gclog_or_tty->flush();
2602 2726 }
2603 2727
2604 2728 #endif // PRODUCT
2605 2729
2606 2730 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2607 2731 // Note we are overriding the read-only view of the prev map here, via
2608 2732 // the cast.
2609 2733 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2610 2734 }
2611 2735
2612 2736 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2613 2737 _nextMarkBitMap->clearRange(mr);
2614 2738 }
2615 2739
2616 2740 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2617 2741 clearRangePrevBitmap(mr);
2618 2742 clearRangeNextBitmap(mr);
2619 2743 }
2620 2744
2621 2745 HeapRegion*
2622 2746 ConcurrentMark::claim_region(uint worker_id) {
2623 2747 // "checkpoint" the finger
2624 2748 HeapWord* finger = _finger;
2625 2749
2626 2750 // _heap_end will not change underneath our feet; it only changes at
2627 2751 // yield points.
2628 2752 while (finger < _heap_end) {
2629 2753 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2630 2754
2631 2755 // Note on how this code handles humongous regions. In the
2632 2756 // normal case the finger will reach the start of a "starts
2633 2757 // humongous" (SH) region. Its end will either be the end of the
2634 2758 // last "continues humongous" (CH) region in the sequence, or the
2635 2759 // standard end of the SH region (if the SH is the only region in
2636 2760 // the sequence). That way claim_region() will skip over the CH
2637 2761 // regions. However, there is a subtle race between a CM thread
2638 2762 // executing this method and a mutator thread doing a humongous
2639 2763 // object allocation. The two are not mutually exclusive as the CM
2640 2764 // thread does not need to hold the Heap_lock when it gets
2641 2765 // here. So there is a chance that claim_region() will come across
2642 2766 // a free region that's in the progress of becoming a SH or a CH
2643 2767 // region. In the former case, it will either
2644 2768 // a) Miss the update to the region's end, in which case it will
2645 2769 // visit every subsequent CH region, will find their bitmaps
2646 2770 // empty, and do nothing, or
2647 2771 // b) Will observe the update of the region's end (in which case
2648 2772 // it will skip the subsequent CH regions).
2649 2773 // If it comes across a region that suddenly becomes CH, the
2650 2774 // scenario will be similar to b). So, the race between
2651 2775 // claim_region() and a humongous object allocation might force us
2652 2776 // to do a bit of unnecessary work (due to some unnecessary bitmap
2653 2777 // iterations) but it should not introduce and correctness issues.
2654 2778 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2655 2779 HeapWord* bottom = curr_region->bottom();
2656 2780 HeapWord* end = curr_region->end();
2657 2781 HeapWord* limit = curr_region->next_top_at_mark_start();
2658 2782
2659 2783 if (verbose_low()) {
2660 2784 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2661 2785 "["PTR_FORMAT", "PTR_FORMAT"), "
2662 2786 "limit = "PTR_FORMAT,
2663 2787 worker_id, curr_region, bottom, end, limit);
2664 2788 }
2665 2789
2666 2790 // Is the gap between reading the finger and doing the CAS too long?
2667 2791 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2668 2792 if (res == finger) {
2669 2793 // we succeeded
2670 2794
2671 2795 // notice that _finger == end cannot be guaranteed here since,
2672 2796 // someone else might have moved the finger even further
2673 2797 assert(_finger >= end, "the finger should have moved forward");
2674 2798
2675 2799 if (verbose_low()) {
2676 2800 gclog_or_tty->print_cr("[%u] we were successful with region = "
2677 2801 PTR_FORMAT, worker_id, curr_region);
2678 2802 }
2679 2803
2680 2804 if (limit > bottom) {
2681 2805 if (verbose_low()) {
2682 2806 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2683 2807 "returning it ", worker_id, curr_region);
2684 2808 }
2685 2809 return curr_region;
2686 2810 } else {
2687 2811 assert(limit == bottom,
2688 2812 "the region limit should be at bottom");
2689 2813 if (verbose_low()) {
2690 2814 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2691 2815 "returning NULL", worker_id, curr_region);
2692 2816 }
2693 2817 // we return NULL and the caller should try calling
2694 2818 // claim_region() again.
2695 2819 return NULL;
2696 2820 }
2697 2821 } else {
2698 2822 assert(_finger > finger, "the finger should have moved forward");
2699 2823 if (verbose_low()) {
2700 2824 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2701 2825 "global finger = "PTR_FORMAT", "
2702 2826 "our finger = "PTR_FORMAT,
2703 2827 worker_id, _finger, finger);
2704 2828 }
2705 2829
2706 2830 // read it again
2707 2831 finger = _finger;
2708 2832 }
2709 2833 }
2710 2834
2711 2835 return NULL;
2712 2836 }
2713 2837
2714 2838 #ifndef PRODUCT
2715 2839 enum VerifyNoCSetOopsPhase {
2716 2840 VerifyNoCSetOopsStack,
2717 2841 VerifyNoCSetOopsQueues,
2718 2842 VerifyNoCSetOopsSATBCompleted,
2719 2843 VerifyNoCSetOopsSATBThread
2720 2844 };
2721 2845
2722 2846 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
2723 2847 private:
2724 2848 G1CollectedHeap* _g1h;
2725 2849 VerifyNoCSetOopsPhase _phase;
2726 2850 int _info;
2727 2851
2728 2852 const char* phase_str() {
2729 2853 switch (_phase) {
2730 2854 case VerifyNoCSetOopsStack: return "Stack";
2731 2855 case VerifyNoCSetOopsQueues: return "Queue";
2732 2856 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2733 2857 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
2734 2858 default: ShouldNotReachHere();
2735 2859 }
2736 2860 return NULL;
2737 2861 }
2738 2862
2739 2863 void do_object_work(oop obj) {
2740 2864 guarantee(!_g1h->obj_in_cs(obj),
2741 2865 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2742 2866 (void*) obj, phase_str(), _info));
2743 2867 }
2744 2868
2745 2869 public:
2746 2870 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2747 2871
2748 2872 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2749 2873 _phase = phase;
2750 2874 _info = info;
2751 2875 }
2752 2876
2753 2877 virtual void do_oop(oop* p) {
2754 2878 oop obj = oopDesc::load_decode_heap_oop(p);
2755 2879 do_object_work(obj);
2756 2880 }
2757 2881
2758 2882 virtual void do_oop(narrowOop* p) {
2759 2883 // We should not come across narrow oops while scanning marking
2760 2884 // stacks and SATB buffers.
2761 2885 ShouldNotReachHere();
2762 2886 }
2763 2887
2764 2888 virtual void do_object(oop obj) {
2765 2889 do_object_work(obj);
2766 2890 }
2767 2891 };
2768 2892
2769 2893 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2770 2894 bool verify_enqueued_buffers,
2771 2895 bool verify_thread_buffers,
2772 2896 bool verify_fingers) {
2773 2897 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2774 2898 if (!G1CollectedHeap::heap()->mark_in_progress()) {
2775 2899 return;
2776 2900 }
2777 2901
↓ open down ↓ |
1600 lines elided |
↑ open up ↑ |
2778 2902 VerifyNoCSetOopsClosure cl;
2779 2903
2780 2904 if (verify_stacks) {
2781 2905 // Verify entries on the global mark stack
2782 2906 cl.set_phase(VerifyNoCSetOopsStack);
2783 2907 _markStack.oops_do(&cl);
2784 2908
2785 2909 // Verify entries on the task queues
2786 2910 for (uint i = 0; i < _max_worker_id; i += 1) {
2787 2911 cl.set_phase(VerifyNoCSetOopsQueues, i);
2788 - OopTaskQueue* queue = _task_queues->queue(i);
2912 + CMTaskQueue* queue = _task_queues->queue(i);
2789 2913 queue->oops_do(&cl);
2790 2914 }
2791 2915 }
2792 2916
2793 2917 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2794 2918
2795 2919 // Verify entries on the enqueued SATB buffers
2796 2920 if (verify_enqueued_buffers) {
2797 2921 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2798 2922 satb_qs.iterate_completed_buffers_read_only(&cl);
2799 2923 }
2800 2924
2801 2925 // Verify entries on the per-thread SATB buffers
2802 2926 if (verify_thread_buffers) {
2803 2927 cl.set_phase(VerifyNoCSetOopsSATBThread);
2804 2928 satb_qs.iterate_thread_buffers_read_only(&cl);
2805 2929 }
2806 2930
2807 2931 if (verify_fingers) {
2808 2932 // Verify the global finger
2809 2933 HeapWord* global_finger = finger();
2810 2934 if (global_finger != NULL && global_finger < _heap_end) {
2811 2935 // The global finger always points to a heap region boundary. We
2812 2936 // use heap_region_containing_raw() to get the containing region
2813 2937 // given that the global finger could be pointing to a free region
2814 2938 // which subsequently becomes continues humongous. If that
2815 2939 // happens, heap_region_containing() will return the bottom of the
2816 2940 // corresponding starts humongous region and the check below will
2817 2941 // not hold any more.
2818 2942 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2819 2943 guarantee(global_finger == global_hr->bottom(),
2820 2944 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2821 2945 global_finger, HR_FORMAT_PARAMS(global_hr)));
2822 2946 }
2823 2947
2824 2948 // Verify the task fingers
2825 2949 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2826 2950 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2827 2951 CMTask* task = _tasks[i];
2828 2952 HeapWord* task_finger = task->finger();
2829 2953 if (task_finger != NULL && task_finger < _heap_end) {
2830 2954 // See above note on the global finger verification.
2831 2955 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2832 2956 guarantee(task_finger == task_hr->bottom() ||
↓ open down ↓ |
34 lines elided |
↑ open up ↑ |
2833 2957 !task_hr->in_collection_set(),
2834 2958 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2835 2959 task_finger, HR_FORMAT_PARAMS(task_hr)));
2836 2960 }
2837 2961 }
2838 2962 }
2839 2963 }
2840 2964 #endif // PRODUCT
2841 2965
2842 2966 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2843 - _markStack.setEmpty();
2844 - _markStack.clear_overflow();
2967 + _markStack.set_should_expand();
2968 + _markStack.setEmpty(); // Also clears the _markStack overflow flag
2845 2969 if (clear_overflow) {
2846 2970 clear_has_overflown();
2847 2971 } else {
2848 2972 assert(has_overflown(), "pre-condition");
2849 2973 }
2850 2974 _finger = _heap_start;
2851 2975
2852 2976 for (uint i = 0; i < _max_worker_id; ++i) {
2853 - OopTaskQueue* queue = _task_queues->queue(i);
2977 + CMTaskQueue* queue = _task_queues->queue(i);
2854 2978 queue->set_empty();
2855 2979 }
2856 2980 }
2857 2981
2858 2982 // Aggregate the counting data that was constructed concurrently
2859 2983 // with marking.
2860 2984 class AggregateCountDataHRClosure: public HeapRegionClosure {
2861 2985 G1CollectedHeap* _g1h;
2862 2986 ConcurrentMark* _cm;
2863 2987 CardTableModRefBS* _ct_bs;
2864 2988 BitMap* _cm_card_bm;
2865 2989 uint _max_worker_id;
2866 2990
2867 2991 public:
2868 2992 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2869 2993 BitMap* cm_card_bm,
2870 2994 uint max_worker_id) :
2871 2995 _g1h(g1h), _cm(g1h->concurrent_mark()),
2872 2996 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2873 2997 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2874 2998
2875 2999 bool doHeapRegion(HeapRegion* hr) {
2876 3000 if (hr->continuesHumongous()) {
2877 3001 // We will ignore these here and process them when their
2878 3002 // associated "starts humongous" region is processed.
2879 3003 // Note that we cannot rely on their associated
2880 3004 // "starts humongous" region to have their bit set to 1
2881 3005 // since, due to the region chunking in the parallel region
2882 3006 // iteration, a "continues humongous" region might be visited
2883 3007 // before its associated "starts humongous".
2884 3008 return false;
2885 3009 }
2886 3010
2887 3011 HeapWord* start = hr->bottom();
2888 3012 HeapWord* limit = hr->next_top_at_mark_start();
2889 3013 HeapWord* end = hr->end();
2890 3014
2891 3015 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2892 3016 err_msg("Preconditions not met - "
2893 3017 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2894 3018 "top: "PTR_FORMAT", end: "PTR_FORMAT,
2895 3019 start, limit, hr->top(), hr->end()));
2896 3020
2897 3021 assert(hr->next_marked_bytes() == 0, "Precondition");
2898 3022
2899 3023 if (start == limit) {
2900 3024 // NTAMS of this region has not been set so nothing to do.
2901 3025 return false;
2902 3026 }
2903 3027
2904 3028 // 'start' should be in the heap.
2905 3029 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2906 3030 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
2907 3031 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2908 3032
2909 3033 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2910 3034 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2911 3035 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2912 3036
2913 3037 // If ntams is not card aligned then we bump card bitmap index
2914 3038 // for limit so that we get the all the cards spanned by
2915 3039 // the object ending at ntams.
2916 3040 // Note: if this is the last region in the heap then ntams
2917 3041 // could be actually just beyond the end of the the heap;
2918 3042 // limit_idx will then correspond to a (non-existent) card
2919 3043 // that is also outside the heap.
2920 3044 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2921 3045 limit_idx += 1;
2922 3046 }
2923 3047
2924 3048 assert(limit_idx <= end_idx, "or else use atomics");
2925 3049
2926 3050 // Aggregate the "stripe" in the count data associated with hr.
2927 3051 uint hrs_index = hr->hrs_index();
2928 3052 size_t marked_bytes = 0;
2929 3053
2930 3054 for (uint i = 0; i < _max_worker_id; i += 1) {
2931 3055 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2932 3056 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2933 3057
2934 3058 // Fetch the marked_bytes in this region for task i and
2935 3059 // add it to the running total for this region.
2936 3060 marked_bytes += marked_bytes_array[hrs_index];
2937 3061
2938 3062 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2939 3063 // into the global card bitmap.
2940 3064 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2941 3065
2942 3066 while (scan_idx < limit_idx) {
2943 3067 assert(task_card_bm->at(scan_idx) == true, "should be");
2944 3068 _cm_card_bm->set_bit(scan_idx);
2945 3069 assert(_cm_card_bm->at(scan_idx) == true, "should be");
2946 3070
2947 3071 // BitMap::get_next_one_offset() can handle the case when
2948 3072 // its left_offset parameter is greater than its right_offset
2949 3073 // parameter. It does, however, have an early exit if
2950 3074 // left_offset == right_offset. So let's limit the value
2951 3075 // passed in for left offset here.
2952 3076 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2953 3077 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2954 3078 }
2955 3079 }
2956 3080
2957 3081 // Update the marked bytes for this region.
2958 3082 hr->add_to_marked_bytes(marked_bytes);
2959 3083
2960 3084 // Next heap region
2961 3085 return false;
2962 3086 }
2963 3087 };
2964 3088
2965 3089 class G1AggregateCountDataTask: public AbstractGangTask {
2966 3090 protected:
2967 3091 G1CollectedHeap* _g1h;
2968 3092 ConcurrentMark* _cm;
2969 3093 BitMap* _cm_card_bm;
2970 3094 uint _max_worker_id;
2971 3095 int _active_workers;
2972 3096
2973 3097 public:
2974 3098 G1AggregateCountDataTask(G1CollectedHeap* g1h,
2975 3099 ConcurrentMark* cm,
2976 3100 BitMap* cm_card_bm,
2977 3101 uint max_worker_id,
2978 3102 int n_workers) :
2979 3103 AbstractGangTask("Count Aggregation"),
2980 3104 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2981 3105 _max_worker_id(max_worker_id),
2982 3106 _active_workers(n_workers) { }
2983 3107
2984 3108 void work(uint worker_id) {
2985 3109 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2986 3110
2987 3111 if (G1CollectedHeap::use_parallel_gc_threads()) {
2988 3112 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2989 3113 _active_workers,
2990 3114 HeapRegion::AggregateCountClaimValue);
2991 3115 } else {
2992 3116 _g1h->heap_region_iterate(&cl);
2993 3117 }
2994 3118 }
2995 3119 };
2996 3120
2997 3121
2998 3122 void ConcurrentMark::aggregate_count_data() {
2999 3123 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3000 3124 _g1h->workers()->active_workers() :
3001 3125 1);
3002 3126
3003 3127 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3004 3128 _max_worker_id, n_workers);
3005 3129
3006 3130 if (G1CollectedHeap::use_parallel_gc_threads()) {
3007 3131 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3008 3132 "sanity check");
3009 3133 _g1h->set_par_threads(n_workers);
3010 3134 _g1h->workers()->run_task(&g1_par_agg_task);
3011 3135 _g1h->set_par_threads(0);
3012 3136
3013 3137 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3014 3138 "sanity check");
3015 3139 _g1h->reset_heap_region_claim_values();
3016 3140 } else {
3017 3141 g1_par_agg_task.work(0);
3018 3142 }
3019 3143 }
3020 3144
3021 3145 // Clear the per-worker arrays used to store the per-region counting data
3022 3146 void ConcurrentMark::clear_all_count_data() {
3023 3147 // Clear the global card bitmap - it will be filled during
3024 3148 // liveness count aggregation (during remark) and the
3025 3149 // final counting task.
3026 3150 _card_bm.clear();
3027 3151
3028 3152 // Clear the global region bitmap - it will be filled as part
3029 3153 // of the final counting task.
3030 3154 _region_bm.clear();
3031 3155
3032 3156 uint max_regions = _g1h->max_regions();
3033 3157 assert(_max_worker_id > 0, "uninitialized");
3034 3158
3035 3159 for (uint i = 0; i < _max_worker_id; i += 1) {
3036 3160 BitMap* task_card_bm = count_card_bitmap_for(i);
3037 3161 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3038 3162
3039 3163 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3040 3164 assert(marked_bytes_array != NULL, "uninitialized");
3041 3165
3042 3166 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3043 3167 task_card_bm->clear();
3044 3168 }
3045 3169 }
3046 3170
3047 3171 void ConcurrentMark::print_stats() {
3048 3172 if (verbose_stats()) {
3049 3173 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3050 3174 for (size_t i = 0; i < _active_tasks; ++i) {
3051 3175 _tasks[i]->print_stats();
3052 3176 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3053 3177 }
3054 3178 }
3055 3179 }
3056 3180
3057 3181 // abandon current marking iteration due to a Full GC
3058 3182 void ConcurrentMark::abort() {
3059 3183 // Clear all marks to force marking thread to do nothing
3060 3184 _nextMarkBitMap->clearAll();
3061 3185 // Clear the liveness counting data
3062 3186 clear_all_count_data();
3063 3187 // Empty mark stack
3064 3188 clear_marking_state();
3065 3189 for (uint i = 0; i < _max_worker_id; ++i) {
3066 3190 _tasks[i]->clear_region_fields();
3067 3191 }
3068 3192 _has_aborted = true;
3069 3193
3070 3194 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3071 3195 satb_mq_set.abandon_partial_marking();
3072 3196 // This can be called either during or outside marking, we'll read
3073 3197 // the expected_active value from the SATB queue set.
3074 3198 satb_mq_set.set_active_all_threads(
3075 3199 false, /* new active value */
3076 3200 satb_mq_set.is_active() /* expected_active */);
3077 3201 }
3078 3202
3079 3203 static void print_ms_time_info(const char* prefix, const char* name,
3080 3204 NumberSeq& ns) {
3081 3205 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3082 3206 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3083 3207 if (ns.num() > 0) {
3084 3208 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3085 3209 prefix, ns.sd(), ns.maximum());
3086 3210 }
3087 3211 }
3088 3212
3089 3213 void ConcurrentMark::print_summary_info() {
3090 3214 gclog_or_tty->print_cr(" Concurrent marking:");
3091 3215 print_ms_time_info(" ", "init marks", _init_times);
3092 3216 print_ms_time_info(" ", "remarks", _remark_times);
3093 3217 {
3094 3218 print_ms_time_info(" ", "final marks", _remark_mark_times);
3095 3219 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3096 3220
3097 3221 }
3098 3222 print_ms_time_info(" ", "cleanups", _cleanup_times);
3099 3223 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3100 3224 _total_counting_time,
3101 3225 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3102 3226 (double)_cleanup_times.num()
3103 3227 : 0.0));
3104 3228 if (G1ScrubRemSets) {
3105 3229 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3106 3230 _total_rs_scrub_time,
3107 3231 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3108 3232 (double)_cleanup_times.num()
3109 3233 : 0.0));
3110 3234 }
3111 3235 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3112 3236 (_init_times.sum() + _remark_times.sum() +
3113 3237 _cleanup_times.sum())/1000.0);
3114 3238 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3115 3239 "(%8.2f s marking).",
3116 3240 cmThread()->vtime_accum(),
3117 3241 cmThread()->vtime_mark_accum());
3118 3242 }
3119 3243
3120 3244 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3121 3245 _parallel_workers->print_worker_threads_on(st);
3122 3246 }
3123 3247
3124 3248 // We take a break if someone is trying to stop the world.
3125 3249 bool ConcurrentMark::do_yield_check(uint worker_id) {
3126 3250 if (should_yield()) {
3127 3251 if (worker_id == 0) {
3128 3252 _g1h->g1_policy()->record_concurrent_pause();
3129 3253 }
3130 3254 cmThread()->yield();
3131 3255 return true;
3132 3256 } else {
3133 3257 return false;
3134 3258 }
3135 3259 }
3136 3260
3137 3261 bool ConcurrentMark::should_yield() {
3138 3262 return cmThread()->should_yield();
3139 3263 }
3140 3264
3141 3265 bool ConcurrentMark::containing_card_is_marked(void* p) {
3142 3266 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3143 3267 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3144 3268 }
3145 3269
3146 3270 bool ConcurrentMark::containing_cards_are_marked(void* start,
3147 3271 void* last) {
3148 3272 return containing_card_is_marked(start) &&
3149 3273 containing_card_is_marked(last);
3150 3274 }
3151 3275
3152 3276 #ifndef PRODUCT
3153 3277 // for debugging purposes
3154 3278 void ConcurrentMark::print_finger() {
3155 3279 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3156 3280 _heap_start, _heap_end, _finger);
3157 3281 for (uint i = 0; i < _max_worker_id; ++i) {
3158 3282 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger());
3159 3283 }
3160 3284 gclog_or_tty->print_cr("");
3161 3285 }
3162 3286 #endif
3163 3287
3164 3288 void CMTask::scan_object(oop obj) {
3165 3289 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3166 3290
3167 3291 if (_cm->verbose_high()) {
3168 3292 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3169 3293 _worker_id, (void*) obj);
3170 3294 }
3171 3295
3172 3296 size_t obj_size = obj->size();
3173 3297 _words_scanned += obj_size;
3174 3298
3175 3299 obj->oop_iterate(_cm_oop_closure);
3176 3300 statsOnly( ++_objs_scanned );
3177 3301 check_limits();
3178 3302 }
3179 3303
3180 3304 // Closure for iteration over bitmaps
3181 3305 class CMBitMapClosure : public BitMapClosure {
3182 3306 private:
3183 3307 // the bitmap that is being iterated over
3184 3308 CMBitMap* _nextMarkBitMap;
3185 3309 ConcurrentMark* _cm;
3186 3310 CMTask* _task;
3187 3311
3188 3312 public:
3189 3313 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3190 3314 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3191 3315
3192 3316 bool do_bit(size_t offset) {
3193 3317 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3194 3318 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3195 3319 assert( addr < _cm->finger(), "invariant");
3196 3320
3197 3321 statsOnly( _task->increase_objs_found_on_bitmap() );
3198 3322 assert(addr >= _task->finger(), "invariant");
3199 3323
3200 3324 // We move that task's local finger along.
3201 3325 _task->move_finger_to(addr);
3202 3326
3203 3327 _task->scan_object(oop(addr));
3204 3328 // we only partially drain the local queue and global stack
3205 3329 _task->drain_local_queue(true);
3206 3330 _task->drain_global_stack(true);
3207 3331
3208 3332 // if the has_aborted flag has been raised, we need to bail out of
3209 3333 // the iteration
3210 3334 return !_task->has_aborted();
3211 3335 }
3212 3336 };
3213 3337
3214 3338 // Closure for iterating over objects, currently only used for
3215 3339 // processing SATB buffers.
3216 3340 class CMObjectClosure : public ObjectClosure {
3217 3341 private:
3218 3342 CMTask* _task;
3219 3343
3220 3344 public:
3221 3345 void do_object(oop obj) {
3222 3346 _task->deal_with_reference(obj);
3223 3347 }
3224 3348
3225 3349 CMObjectClosure(CMTask* task) : _task(task) { }
3226 3350 };
3227 3351
3228 3352 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3229 3353 ConcurrentMark* cm,
3230 3354 CMTask* task)
3231 3355 : _g1h(g1h), _cm(cm), _task(task) {
3232 3356 assert(_ref_processor == NULL, "should be initialized to NULL");
3233 3357
3234 3358 if (G1UseConcMarkReferenceProcessing) {
3235 3359 _ref_processor = g1h->ref_processor_cm();
3236 3360 assert(_ref_processor != NULL, "should not be NULL");
3237 3361 }
3238 3362 }
3239 3363
3240 3364 void CMTask::setup_for_region(HeapRegion* hr) {
3241 3365 // Separated the asserts so that we know which one fires.
3242 3366 assert(hr != NULL,
3243 3367 "claim_region() should have filtered out continues humongous regions");
3244 3368 assert(!hr->continuesHumongous(),
3245 3369 "claim_region() should have filtered out continues humongous regions");
3246 3370
3247 3371 if (_cm->verbose_low()) {
3248 3372 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3249 3373 _worker_id, hr);
3250 3374 }
3251 3375
3252 3376 _curr_region = hr;
3253 3377 _finger = hr->bottom();
3254 3378 update_region_limit();
3255 3379 }
3256 3380
3257 3381 void CMTask::update_region_limit() {
3258 3382 HeapRegion* hr = _curr_region;
3259 3383 HeapWord* bottom = hr->bottom();
3260 3384 HeapWord* limit = hr->next_top_at_mark_start();
3261 3385
3262 3386 if (limit == bottom) {
3263 3387 if (_cm->verbose_low()) {
3264 3388 gclog_or_tty->print_cr("[%u] found an empty region "
3265 3389 "["PTR_FORMAT", "PTR_FORMAT")",
3266 3390 _worker_id, bottom, limit);
3267 3391 }
3268 3392 // The region was collected underneath our feet.
3269 3393 // We set the finger to bottom to ensure that the bitmap
3270 3394 // iteration that will follow this will not do anything.
3271 3395 // (this is not a condition that holds when we set the region up,
3272 3396 // as the region is not supposed to be empty in the first place)
3273 3397 _finger = bottom;
3274 3398 } else if (limit >= _region_limit) {
3275 3399 assert(limit >= _finger, "peace of mind");
3276 3400 } else {
3277 3401 assert(limit < _region_limit, "only way to get here");
3278 3402 // This can happen under some pretty unusual circumstances. An
3279 3403 // evacuation pause empties the region underneath our feet (NTAMS
3280 3404 // at bottom). We then do some allocation in the region (NTAMS
3281 3405 // stays at bottom), followed by the region being used as a GC
3282 3406 // alloc region (NTAMS will move to top() and the objects
3283 3407 // originally below it will be grayed). All objects now marked in
3284 3408 // the region are explicitly grayed, if below the global finger,
3285 3409 // and we do not need in fact to scan anything else. So, we simply
3286 3410 // set _finger to be limit to ensure that the bitmap iteration
3287 3411 // doesn't do anything.
3288 3412 _finger = limit;
3289 3413 }
3290 3414
3291 3415 _region_limit = limit;
3292 3416 }
3293 3417
3294 3418 void CMTask::giveup_current_region() {
3295 3419 assert(_curr_region != NULL, "invariant");
3296 3420 if (_cm->verbose_low()) {
3297 3421 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3298 3422 _worker_id, _curr_region);
3299 3423 }
3300 3424 clear_region_fields();
3301 3425 }
3302 3426
3303 3427 void CMTask::clear_region_fields() {
3304 3428 // Values for these three fields that indicate that we're not
3305 3429 // holding on to a region.
3306 3430 _curr_region = NULL;
3307 3431 _finger = NULL;
3308 3432 _region_limit = NULL;
3309 3433 }
3310 3434
3311 3435 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3312 3436 if (cm_oop_closure == NULL) {
3313 3437 assert(_cm_oop_closure != NULL, "invariant");
3314 3438 } else {
3315 3439 assert(_cm_oop_closure == NULL, "invariant");
3316 3440 }
3317 3441 _cm_oop_closure = cm_oop_closure;
3318 3442 }
3319 3443
3320 3444 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3321 3445 guarantee(nextMarkBitMap != NULL, "invariant");
3322 3446
3323 3447 if (_cm->verbose_low()) {
3324 3448 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3325 3449 }
3326 3450
3327 3451 _nextMarkBitMap = nextMarkBitMap;
3328 3452 clear_region_fields();
3329 3453
3330 3454 _calls = 0;
3331 3455 _elapsed_time_ms = 0.0;
3332 3456 _termination_time_ms = 0.0;
3333 3457 _termination_start_time_ms = 0.0;
3334 3458
3335 3459 #if _MARKING_STATS_
3336 3460 _local_pushes = 0;
3337 3461 _local_pops = 0;
3338 3462 _local_max_size = 0;
3339 3463 _objs_scanned = 0;
3340 3464 _global_pushes = 0;
3341 3465 _global_pops = 0;
3342 3466 _global_max_size = 0;
3343 3467 _global_transfers_to = 0;
3344 3468 _global_transfers_from = 0;
3345 3469 _regions_claimed = 0;
3346 3470 _objs_found_on_bitmap = 0;
3347 3471 _satb_buffers_processed = 0;
3348 3472 _steal_attempts = 0;
3349 3473 _steals = 0;
3350 3474 _aborted = 0;
3351 3475 _aborted_overflow = 0;
3352 3476 _aborted_cm_aborted = 0;
3353 3477 _aborted_yield = 0;
3354 3478 _aborted_timed_out = 0;
3355 3479 _aborted_satb = 0;
3356 3480 _aborted_termination = 0;
3357 3481 #endif // _MARKING_STATS_
3358 3482 }
3359 3483
3360 3484 bool CMTask::should_exit_termination() {
3361 3485 regular_clock_call();
3362 3486 // This is called when we are in the termination protocol. We should
3363 3487 // quit if, for some reason, this task wants to abort or the global
3364 3488 // stack is not empty (this means that we can get work from it).
3365 3489 return !_cm->mark_stack_empty() || has_aborted();
3366 3490 }
3367 3491
3368 3492 void CMTask::reached_limit() {
3369 3493 assert(_words_scanned >= _words_scanned_limit ||
3370 3494 _refs_reached >= _refs_reached_limit ,
3371 3495 "shouldn't have been called otherwise");
3372 3496 regular_clock_call();
3373 3497 }
3374 3498
3375 3499 void CMTask::regular_clock_call() {
3376 3500 if (has_aborted()) return;
3377 3501
3378 3502 // First, we need to recalculate the words scanned and refs reached
3379 3503 // limits for the next clock call.
3380 3504 recalculate_limits();
3381 3505
3382 3506 // During the regular clock call we do the following
3383 3507
3384 3508 // (1) If an overflow has been flagged, then we abort.
3385 3509 if (_cm->has_overflown()) {
3386 3510 set_has_aborted();
3387 3511 return;
3388 3512 }
3389 3513
3390 3514 // If we are not concurrent (i.e. we're doing remark) we don't need
3391 3515 // to check anything else. The other steps are only needed during
3392 3516 // the concurrent marking phase.
3393 3517 if (!concurrent()) return;
3394 3518
3395 3519 // (2) If marking has been aborted for Full GC, then we also abort.
3396 3520 if (_cm->has_aborted()) {
3397 3521 set_has_aborted();
3398 3522 statsOnly( ++_aborted_cm_aborted );
3399 3523 return;
3400 3524 }
3401 3525
3402 3526 double curr_time_ms = os::elapsedVTime() * 1000.0;
3403 3527
3404 3528 // (3) If marking stats are enabled, then we update the step history.
3405 3529 #if _MARKING_STATS_
3406 3530 if (_words_scanned >= _words_scanned_limit) {
3407 3531 ++_clock_due_to_scanning;
3408 3532 }
3409 3533 if (_refs_reached >= _refs_reached_limit) {
3410 3534 ++_clock_due_to_marking;
3411 3535 }
3412 3536
3413 3537 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3414 3538 _interval_start_time_ms = curr_time_ms;
3415 3539 _all_clock_intervals_ms.add(last_interval_ms);
3416 3540
3417 3541 if (_cm->verbose_medium()) {
3418 3542 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3419 3543 "scanned = %d%s, refs reached = %d%s",
3420 3544 _worker_id, last_interval_ms,
3421 3545 _words_scanned,
3422 3546 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3423 3547 _refs_reached,
3424 3548 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3425 3549 }
3426 3550 #endif // _MARKING_STATS_
3427 3551
3428 3552 // (4) We check whether we should yield. If we have to, then we abort.
3429 3553 if (_cm->should_yield()) {
3430 3554 // We should yield. To do this we abort the task. The caller is
3431 3555 // responsible for yielding.
3432 3556 set_has_aborted();
3433 3557 statsOnly( ++_aborted_yield );
3434 3558 return;
3435 3559 }
3436 3560
3437 3561 // (5) We check whether we've reached our time quota. If we have,
3438 3562 // then we abort.
3439 3563 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3440 3564 if (elapsed_time_ms > _time_target_ms) {
3441 3565 set_has_aborted();
3442 3566 _has_timed_out = true;
3443 3567 statsOnly( ++_aborted_timed_out );
3444 3568 return;
3445 3569 }
3446 3570
3447 3571 // (6) Finally, we check whether there are enough completed STAB
3448 3572 // buffers available for processing. If there are, we abort.
3449 3573 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3450 3574 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3451 3575 if (_cm->verbose_low()) {
3452 3576 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3453 3577 _worker_id);
3454 3578 }
3455 3579 // we do need to process SATB buffers, we'll abort and restart
3456 3580 // the marking task to do so
3457 3581 set_has_aborted();
3458 3582 statsOnly( ++_aborted_satb );
3459 3583 return;
3460 3584 }
3461 3585 }
3462 3586
3463 3587 void CMTask::recalculate_limits() {
3464 3588 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3465 3589 _words_scanned_limit = _real_words_scanned_limit;
3466 3590
3467 3591 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3468 3592 _refs_reached_limit = _real_refs_reached_limit;
3469 3593 }
3470 3594
3471 3595 void CMTask::decrease_limits() {
3472 3596 // This is called when we believe that we're going to do an infrequent
3473 3597 // operation which will increase the per byte scanned cost (i.e. move
3474 3598 // entries to/from the global stack). It basically tries to decrease the
3475 3599 // scanning limit so that the clock is called earlier.
3476 3600
3477 3601 if (_cm->verbose_medium()) {
3478 3602 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3479 3603 }
3480 3604
3481 3605 _words_scanned_limit = _real_words_scanned_limit -
3482 3606 3 * words_scanned_period / 4;
3483 3607 _refs_reached_limit = _real_refs_reached_limit -
3484 3608 3 * refs_reached_period / 4;
3485 3609 }
3486 3610
3487 3611 void CMTask::move_entries_to_global_stack() {
3488 3612 // local array where we'll store the entries that will be popped
3489 3613 // from the local queue
3490 3614 oop buffer[global_stack_transfer_size];
3491 3615
3492 3616 int n = 0;
3493 3617 oop obj;
3494 3618 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3495 3619 buffer[n] = obj;
3496 3620 ++n;
3497 3621 }
3498 3622
3499 3623 if (n > 0) {
3500 3624 // we popped at least one entry from the local queue
3501 3625
3502 3626 statsOnly( ++_global_transfers_to; _local_pops += n );
3503 3627
3504 3628 if (!_cm->mark_stack_push(buffer, n)) {
3505 3629 if (_cm->verbose_low()) {
3506 3630 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3507 3631 _worker_id);
3508 3632 }
3509 3633 set_has_aborted();
3510 3634 } else {
3511 3635 // the transfer was successful
3512 3636
3513 3637 if (_cm->verbose_medium()) {
3514 3638 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3515 3639 _worker_id, n);
3516 3640 }
3517 3641 statsOnly( int tmp_size = _cm->mark_stack_size();
3518 3642 if (tmp_size > _global_max_size) {
3519 3643 _global_max_size = tmp_size;
3520 3644 }
3521 3645 _global_pushes += n );
3522 3646 }
3523 3647 }
3524 3648
3525 3649 // this operation was quite expensive, so decrease the limits
3526 3650 decrease_limits();
3527 3651 }
3528 3652
3529 3653 void CMTask::get_entries_from_global_stack() {
3530 3654 // local array where we'll store the entries that will be popped
3531 3655 // from the global stack.
3532 3656 oop buffer[global_stack_transfer_size];
3533 3657 int n;
3534 3658 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3535 3659 assert(n <= global_stack_transfer_size,
3536 3660 "we should not pop more than the given limit");
3537 3661 if (n > 0) {
3538 3662 // yes, we did actually pop at least one entry
3539 3663
3540 3664 statsOnly( ++_global_transfers_from; _global_pops += n );
3541 3665 if (_cm->verbose_medium()) {
3542 3666 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3543 3667 _worker_id, n);
3544 3668 }
3545 3669 for (int i = 0; i < n; ++i) {
3546 3670 bool success = _task_queue->push(buffer[i]);
3547 3671 // We only call this when the local queue is empty or under a
3548 3672 // given target limit. So, we do not expect this push to fail.
3549 3673 assert(success, "invariant");
3550 3674 }
3551 3675
3552 3676 statsOnly( int tmp_size = _task_queue->size();
3553 3677 if (tmp_size > _local_max_size) {
3554 3678 _local_max_size = tmp_size;
3555 3679 }
3556 3680 _local_pushes += n );
3557 3681 }
3558 3682
3559 3683 // this operation was quite expensive, so decrease the limits
3560 3684 decrease_limits();
3561 3685 }
3562 3686
3563 3687 void CMTask::drain_local_queue(bool partially) {
3564 3688 if (has_aborted()) return;
3565 3689
3566 3690 // Decide what the target size is, depending whether we're going to
3567 3691 // drain it partially (so that other tasks can steal if they run out
3568 3692 // of things to do) or totally (at the very end).
3569 3693 size_t target_size;
3570 3694 if (partially) {
3571 3695 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3572 3696 } else {
3573 3697 target_size = 0;
3574 3698 }
3575 3699
3576 3700 if (_task_queue->size() > target_size) {
3577 3701 if (_cm->verbose_high()) {
3578 3702 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3579 3703 _worker_id, target_size);
3580 3704 }
3581 3705
3582 3706 oop obj;
3583 3707 bool ret = _task_queue->pop_local(obj);
3584 3708 while (ret) {
3585 3709 statsOnly( ++_local_pops );
3586 3710
3587 3711 if (_cm->verbose_high()) {
3588 3712 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3589 3713 (void*) obj);
3590 3714 }
3591 3715
3592 3716 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3593 3717 assert(!_g1h->is_on_master_free_list(
3594 3718 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3595 3719
3596 3720 scan_object(obj);
3597 3721
3598 3722 if (_task_queue->size() <= target_size || has_aborted()) {
3599 3723 ret = false;
3600 3724 } else {
3601 3725 ret = _task_queue->pop_local(obj);
3602 3726 }
3603 3727 }
3604 3728
3605 3729 if (_cm->verbose_high()) {
3606 3730 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3607 3731 _worker_id, _task_queue->size());
3608 3732 }
3609 3733 }
3610 3734 }
3611 3735
3612 3736 void CMTask::drain_global_stack(bool partially) {
3613 3737 if (has_aborted()) return;
3614 3738
3615 3739 // We have a policy to drain the local queue before we attempt to
3616 3740 // drain the global stack.
3617 3741 assert(partially || _task_queue->size() == 0, "invariant");
3618 3742
3619 3743 // Decide what the target size is, depending whether we're going to
3620 3744 // drain it partially (so that other tasks can steal if they run out
3621 3745 // of things to do) or totally (at the very end). Notice that,
3622 3746 // because we move entries from the global stack in chunks or
3623 3747 // because another task might be doing the same, we might in fact
3624 3748 // drop below the target. But, this is not a problem.
3625 3749 size_t target_size;
3626 3750 if (partially) {
3627 3751 target_size = _cm->partial_mark_stack_size_target();
3628 3752 } else {
3629 3753 target_size = 0;
3630 3754 }
3631 3755
3632 3756 if (_cm->mark_stack_size() > target_size) {
3633 3757 if (_cm->verbose_low()) {
3634 3758 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3635 3759 _worker_id, target_size);
3636 3760 }
3637 3761
3638 3762 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3639 3763 get_entries_from_global_stack();
3640 3764 drain_local_queue(partially);
3641 3765 }
3642 3766
3643 3767 if (_cm->verbose_low()) {
3644 3768 gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3645 3769 _worker_id, _cm->mark_stack_size());
3646 3770 }
3647 3771 }
3648 3772 }
3649 3773
3650 3774 // SATB Queue has several assumptions on whether to call the par or
3651 3775 // non-par versions of the methods. this is why some of the code is
3652 3776 // replicated. We should really get rid of the single-threaded version
3653 3777 // of the code to simplify things.
3654 3778 void CMTask::drain_satb_buffers() {
3655 3779 if (has_aborted()) return;
3656 3780
3657 3781 // We set this so that the regular clock knows that we're in the
3658 3782 // middle of draining buffers and doesn't set the abort flag when it
3659 3783 // notices that SATB buffers are available for draining. It'd be
3660 3784 // very counter productive if it did that. :-)
3661 3785 _draining_satb_buffers = true;
3662 3786
3663 3787 CMObjectClosure oc(this);
3664 3788 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3665 3789 if (G1CollectedHeap::use_parallel_gc_threads()) {
3666 3790 satb_mq_set.set_par_closure(_worker_id, &oc);
3667 3791 } else {
3668 3792 satb_mq_set.set_closure(&oc);
3669 3793 }
3670 3794
3671 3795 // This keeps claiming and applying the closure to completed buffers
3672 3796 // until we run out of buffers or we need to abort.
3673 3797 if (G1CollectedHeap::use_parallel_gc_threads()) {
3674 3798 while (!has_aborted() &&
3675 3799 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3676 3800 if (_cm->verbose_medium()) {
3677 3801 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3678 3802 }
3679 3803 statsOnly( ++_satb_buffers_processed );
3680 3804 regular_clock_call();
3681 3805 }
3682 3806 } else {
3683 3807 while (!has_aborted() &&
3684 3808 satb_mq_set.apply_closure_to_completed_buffer()) {
3685 3809 if (_cm->verbose_medium()) {
3686 3810 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3687 3811 }
3688 3812 statsOnly( ++_satb_buffers_processed );
3689 3813 regular_clock_call();
3690 3814 }
3691 3815 }
3692 3816
3693 3817 if (!concurrent() && !has_aborted()) {
3694 3818 // We should only do this during remark.
3695 3819 if (G1CollectedHeap::use_parallel_gc_threads()) {
3696 3820 satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3697 3821 } else {
3698 3822 satb_mq_set.iterate_closure_all_threads();
3699 3823 }
3700 3824 }
3701 3825
3702 3826 _draining_satb_buffers = false;
3703 3827
3704 3828 assert(has_aborted() ||
3705 3829 concurrent() ||
3706 3830 satb_mq_set.completed_buffers_num() == 0, "invariant");
3707 3831
3708 3832 if (G1CollectedHeap::use_parallel_gc_threads()) {
3709 3833 satb_mq_set.set_par_closure(_worker_id, NULL);
3710 3834 } else {
3711 3835 satb_mq_set.set_closure(NULL);
3712 3836 }
3713 3837
3714 3838 // again, this was a potentially expensive operation, decrease the
3715 3839 // limits to get the regular clock call early
3716 3840 decrease_limits();
3717 3841 }
3718 3842
3719 3843 void CMTask::print_stats() {
3720 3844 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3721 3845 _worker_id, _calls);
3722 3846 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3723 3847 _elapsed_time_ms, _termination_time_ms);
3724 3848 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3725 3849 _step_times_ms.num(), _step_times_ms.avg(),
3726 3850 _step_times_ms.sd());
3727 3851 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3728 3852 _step_times_ms.maximum(), _step_times_ms.sum());
3729 3853
3730 3854 #if _MARKING_STATS_
3731 3855 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3732 3856 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3733 3857 _all_clock_intervals_ms.sd());
3734 3858 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3735 3859 _all_clock_intervals_ms.maximum(),
3736 3860 _all_clock_intervals_ms.sum());
3737 3861 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3738 3862 _clock_due_to_scanning, _clock_due_to_marking);
3739 3863 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3740 3864 _objs_scanned, _objs_found_on_bitmap);
3741 3865 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3742 3866 _local_pushes, _local_pops, _local_max_size);
3743 3867 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3744 3868 _global_pushes, _global_pops, _global_max_size);
3745 3869 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3746 3870 _global_transfers_to,_global_transfers_from);
3747 3871 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
3748 3872 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3749 3873 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3750 3874 _steal_attempts, _steals);
3751 3875 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3752 3876 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3753 3877 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3754 3878 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3755 3879 _aborted_timed_out, _aborted_satb, _aborted_termination);
3756 3880 #endif // _MARKING_STATS_
3757 3881 }
3758 3882
3759 3883 /*****************************************************************************
3760 3884
3761 3885 The do_marking_step(time_target_ms) method is the building block
3762 3886 of the parallel marking framework. It can be called in parallel
3763 3887 with other invocations of do_marking_step() on different tasks
3764 3888 (but only one per task, obviously) and concurrently with the
3765 3889 mutator threads, or during remark, hence it eliminates the need
3766 3890 for two versions of the code. When called during remark, it will
3767 3891 pick up from where the task left off during the concurrent marking
3768 3892 phase. Interestingly, tasks are also claimable during evacuation
3769 3893 pauses too, since do_marking_step() ensures that it aborts before
3770 3894 it needs to yield.
3771 3895
3772 3896 The data structures that is uses to do marking work are the
3773 3897 following:
3774 3898
3775 3899 (1) Marking Bitmap. If there are gray objects that appear only
3776 3900 on the bitmap (this happens either when dealing with an overflow
3777 3901 or when the initial marking phase has simply marked the roots
3778 3902 and didn't push them on the stack), then tasks claim heap
3779 3903 regions whose bitmap they then scan to find gray objects. A
3780 3904 global finger indicates where the end of the last claimed region
3781 3905 is. A local finger indicates how far into the region a task has
3782 3906 scanned. The two fingers are used to determine how to gray an
3783 3907 object (i.e. whether simply marking it is OK, as it will be
3784 3908 visited by a task in the future, or whether it needs to be also
3785 3909 pushed on a stack).
3786 3910
3787 3911 (2) Local Queue. The local queue of the task which is accessed
3788 3912 reasonably efficiently by the task. Other tasks can steal from
3789 3913 it when they run out of work. Throughout the marking phase, a
3790 3914 task attempts to keep its local queue short but not totally
3791 3915 empty, so that entries are available for stealing by other
3792 3916 tasks. Only when there is no more work, a task will totally
3793 3917 drain its local queue.
3794 3918
3795 3919 (3) Global Mark Stack. This handles local queue overflow. During
3796 3920 marking only sets of entries are moved between it and the local
3797 3921 queues, as access to it requires a mutex and more fine-grain
3798 3922 interaction with it which might cause contention. If it
3799 3923 overflows, then the marking phase should restart and iterate
3800 3924 over the bitmap to identify gray objects. Throughout the marking
3801 3925 phase, tasks attempt to keep the global mark stack at a small
3802 3926 length but not totally empty, so that entries are available for
3803 3927 popping by other tasks. Only when there is no more work, tasks
3804 3928 will totally drain the global mark stack.
3805 3929
3806 3930 (4) SATB Buffer Queue. This is where completed SATB buffers are
3807 3931 made available. Buffers are regularly removed from this queue
3808 3932 and scanned for roots, so that the queue doesn't get too
3809 3933 long. During remark, all completed buffers are processed, as
3810 3934 well as the filled in parts of any uncompleted buffers.
3811 3935
3812 3936 The do_marking_step() method tries to abort when the time target
3813 3937 has been reached. There are a few other cases when the
3814 3938 do_marking_step() method also aborts:
3815 3939
3816 3940 (1) When the marking phase has been aborted (after a Full GC).
3817 3941
3818 3942 (2) When a global overflow (on the global stack) has been
3819 3943 triggered. Before the task aborts, it will actually sync up with
3820 3944 the other tasks to ensure that all the marking data structures
3821 3945 (local queues, stacks, fingers etc.) are re-initialised so that
3822 3946 when do_marking_step() completes, the marking phase can
3823 3947 immediately restart.
3824 3948
3825 3949 (3) When enough completed SATB buffers are available. The
3826 3950 do_marking_step() method only tries to drain SATB buffers right
3827 3951 at the beginning. So, if enough buffers are available, the
3828 3952 marking step aborts and the SATB buffers are processed at
3829 3953 the beginning of the next invocation.
3830 3954
3831 3955 (4) To yield. when we have to yield then we abort and yield
3832 3956 right at the end of do_marking_step(). This saves us from a lot
3833 3957 of hassle as, by yielding we might allow a Full GC. If this
3834 3958 happens then objects will be compacted underneath our feet, the
3835 3959 heap might shrink, etc. We save checking for this by just
3836 3960 aborting and doing the yield right at the end.
3837 3961
3838 3962 From the above it follows that the do_marking_step() method should
3839 3963 be called in a loop (or, otherwise, regularly) until it completes.
3840 3964
3841 3965 If a marking step completes without its has_aborted() flag being
3842 3966 true, it means it has completed the current marking phase (and
3843 3967 also all other marking tasks have done so and have all synced up).
3844 3968
3845 3969 A method called regular_clock_call() is invoked "regularly" (in
3846 3970 sub ms intervals) throughout marking. It is this clock method that
3847 3971 checks all the abort conditions which were mentioned above and
3848 3972 decides when the task should abort. A work-based scheme is used to
3849 3973 trigger this clock method: when the number of object words the
3850 3974 marking phase has scanned or the number of references the marking
3851 3975 phase has visited reach a given limit. Additional invocations to
3852 3976 the method clock have been planted in a few other strategic places
3853 3977 too. The initial reason for the clock method was to avoid calling
3854 3978 vtime too regularly, as it is quite expensive. So, once it was in
3855 3979 place, it was natural to piggy-back all the other conditions on it
3856 3980 too and not constantly check them throughout the code.
3857 3981
3858 3982 *****************************************************************************/
3859 3983
3860 3984 void CMTask::do_marking_step(double time_target_ms,
3861 3985 bool do_stealing,
3862 3986 bool do_termination) {
3863 3987 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3864 3988 assert(concurrent() == _cm->concurrent(), "they should be the same");
3865 3989
3866 3990 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3867 3991 assert(_task_queues != NULL, "invariant");
3868 3992 assert(_task_queue != NULL, "invariant");
3869 3993 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3870 3994
3871 3995 assert(!_claimed,
3872 3996 "only one thread should claim this task at any one time");
3873 3997
3874 3998 // OK, this doesn't safeguard again all possible scenarios, as it is
3875 3999 // possible for two threads to set the _claimed flag at the same
3876 4000 // time. But it is only for debugging purposes anyway and it will
3877 4001 // catch most problems.
3878 4002 _claimed = true;
3879 4003
3880 4004 _start_time_ms = os::elapsedVTime() * 1000.0;
3881 4005 statsOnly( _interval_start_time_ms = _start_time_ms );
3882 4006
3883 4007 double diff_prediction_ms =
3884 4008 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3885 4009 _time_target_ms = time_target_ms - diff_prediction_ms;
3886 4010
3887 4011 // set up the variables that are used in the work-based scheme to
3888 4012 // call the regular clock method
3889 4013 _words_scanned = 0;
3890 4014 _refs_reached = 0;
3891 4015 recalculate_limits();
3892 4016
3893 4017 // clear all flags
3894 4018 clear_has_aborted();
3895 4019 _has_timed_out = false;
3896 4020 _draining_satb_buffers = false;
3897 4021
3898 4022 ++_calls;
3899 4023
3900 4024 if (_cm->verbose_low()) {
3901 4025 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3902 4026 "target = %1.2lfms >>>>>>>>>>",
3903 4027 _worker_id, _calls, _time_target_ms);
3904 4028 }
3905 4029
3906 4030 // Set up the bitmap and oop closures. Anything that uses them is
3907 4031 // eventually called from this method, so it is OK to allocate these
3908 4032 // statically.
3909 4033 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3910 4034 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
3911 4035 set_cm_oop_closure(&cm_oop_closure);
3912 4036
3913 4037 if (_cm->has_overflown()) {
3914 4038 // This can happen if the mark stack overflows during a GC pause
3915 4039 // and this task, after a yield point, restarts. We have to abort
3916 4040 // as we need to get into the overflow protocol which happens
3917 4041 // right at the end of this task.
3918 4042 set_has_aborted();
3919 4043 }
3920 4044
3921 4045 // First drain any available SATB buffers. After this, we will not
3922 4046 // look at SATB buffers before the next invocation of this method.
3923 4047 // If enough completed SATB buffers are queued up, the regular clock
3924 4048 // will abort this task so that it restarts.
3925 4049 drain_satb_buffers();
3926 4050 // ...then partially drain the local queue and the global stack
3927 4051 drain_local_queue(true);
3928 4052 drain_global_stack(true);
3929 4053
3930 4054 do {
3931 4055 if (!has_aborted() && _curr_region != NULL) {
3932 4056 // This means that we're already holding on to a region.
3933 4057 assert(_finger != NULL, "if region is not NULL, then the finger "
3934 4058 "should not be NULL either");
3935 4059
3936 4060 // We might have restarted this task after an evacuation pause
3937 4061 // which might have evacuated the region we're holding on to
3938 4062 // underneath our feet. Let's read its limit again to make sure
3939 4063 // that we do not iterate over a region of the heap that
3940 4064 // contains garbage (update_region_limit() will also move
3941 4065 // _finger to the start of the region if it is found empty).
3942 4066 update_region_limit();
3943 4067 // We will start from _finger not from the start of the region,
3944 4068 // as we might be restarting this task after aborting half-way
3945 4069 // through scanning this region. In this case, _finger points to
3946 4070 // the address where we last found a marked object. If this is a
3947 4071 // fresh region, _finger points to start().
3948 4072 MemRegion mr = MemRegion(_finger, _region_limit);
3949 4073
3950 4074 if (_cm->verbose_low()) {
3951 4075 gclog_or_tty->print_cr("[%u] we're scanning part "
3952 4076 "["PTR_FORMAT", "PTR_FORMAT") "
3953 4077 "of region "PTR_FORMAT,
3954 4078 _worker_id, _finger, _region_limit, _curr_region);
3955 4079 }
3956 4080
3957 4081 // Let's iterate over the bitmap of the part of the
3958 4082 // region that is left.
3959 4083 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3960 4084 // We successfully completed iterating over the region. Now,
3961 4085 // let's give up the region.
3962 4086 giveup_current_region();
3963 4087 regular_clock_call();
3964 4088 } else {
3965 4089 assert(has_aborted(), "currently the only way to do so");
3966 4090 // The only way to abort the bitmap iteration is to return
3967 4091 // false from the do_bit() method. However, inside the
3968 4092 // do_bit() method we move the _finger to point to the
3969 4093 // object currently being looked at. So, if we bail out, we
3970 4094 // have definitely set _finger to something non-null.
3971 4095 assert(_finger != NULL, "invariant");
3972 4096
3973 4097 // Region iteration was actually aborted. So now _finger
3974 4098 // points to the address of the object we last scanned. If we
3975 4099 // leave it there, when we restart this task, we will rescan
3976 4100 // the object. It is easy to avoid this. We move the finger by
3977 4101 // enough to point to the next possible object header (the
3978 4102 // bitmap knows by how much we need to move it as it knows its
3979 4103 // granularity).
3980 4104 assert(_finger < _region_limit, "invariant");
3981 4105 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3982 4106 // Check if bitmap iteration was aborted while scanning the last object
3983 4107 if (new_finger >= _region_limit) {
3984 4108 giveup_current_region();
3985 4109 } else {
3986 4110 move_finger_to(new_finger);
3987 4111 }
3988 4112 }
3989 4113 }
3990 4114 // At this point we have either completed iterating over the
3991 4115 // region we were holding on to, or we have aborted.
3992 4116
3993 4117 // We then partially drain the local queue and the global stack.
3994 4118 // (Do we really need this?)
3995 4119 drain_local_queue(true);
3996 4120 drain_global_stack(true);
3997 4121
3998 4122 // Read the note on the claim_region() method on why it might
3999 4123 // return NULL with potentially more regions available for
4000 4124 // claiming and why we have to check out_of_regions() to determine
4001 4125 // whether we're done or not.
4002 4126 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4003 4127 // We are going to try to claim a new region. We should have
4004 4128 // given up on the previous one.
4005 4129 // Separated the asserts so that we know which one fires.
4006 4130 assert(_curr_region == NULL, "invariant");
4007 4131 assert(_finger == NULL, "invariant");
4008 4132 assert(_region_limit == NULL, "invariant");
4009 4133 if (_cm->verbose_low()) {
4010 4134 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4011 4135 }
4012 4136 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4013 4137 if (claimed_region != NULL) {
4014 4138 // Yes, we managed to claim one
4015 4139 statsOnly( ++_regions_claimed );
4016 4140
4017 4141 if (_cm->verbose_low()) {
4018 4142 gclog_or_tty->print_cr("[%u] we successfully claimed "
4019 4143 "region "PTR_FORMAT,
4020 4144 _worker_id, claimed_region);
4021 4145 }
4022 4146
4023 4147 setup_for_region(claimed_region);
4024 4148 assert(_curr_region == claimed_region, "invariant");
4025 4149 }
4026 4150 // It is important to call the regular clock here. It might take
4027 4151 // a while to claim a region if, for example, we hit a large
4028 4152 // block of empty regions. So we need to call the regular clock
4029 4153 // method once round the loop to make sure it's called
4030 4154 // frequently enough.
4031 4155 regular_clock_call();
4032 4156 }
4033 4157
4034 4158 if (!has_aborted() && _curr_region == NULL) {
4035 4159 assert(_cm->out_of_regions(),
4036 4160 "at this point we should be out of regions");
4037 4161 }
4038 4162 } while ( _curr_region != NULL && !has_aborted());
4039 4163
4040 4164 if (!has_aborted()) {
4041 4165 // We cannot check whether the global stack is empty, since other
4042 4166 // tasks might be pushing objects to it concurrently.
4043 4167 assert(_cm->out_of_regions(),
4044 4168 "at this point we should be out of regions");
4045 4169
4046 4170 if (_cm->verbose_low()) {
4047 4171 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4048 4172 }
4049 4173
4050 4174 // Try to reduce the number of available SATB buffers so that
4051 4175 // remark has less work to do.
4052 4176 drain_satb_buffers();
4053 4177 }
4054 4178
4055 4179 // Since we've done everything else, we can now totally drain the
4056 4180 // local queue and global stack.
4057 4181 drain_local_queue(false);
4058 4182 drain_global_stack(false);
4059 4183
4060 4184 // Attempt at work stealing from other task's queues.
4061 4185 if (do_stealing && !has_aborted()) {
4062 4186 // We have not aborted. This means that we have finished all that
4063 4187 // we could. Let's try to do some stealing...
4064 4188
4065 4189 // We cannot check whether the global stack is empty, since other
4066 4190 // tasks might be pushing objects to it concurrently.
4067 4191 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4068 4192 "only way to reach here");
4069 4193
4070 4194 if (_cm->verbose_low()) {
4071 4195 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4072 4196 }
4073 4197
4074 4198 while (!has_aborted()) {
4075 4199 oop obj;
4076 4200 statsOnly( ++_steal_attempts );
4077 4201
4078 4202 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4079 4203 if (_cm->verbose_medium()) {
4080 4204 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4081 4205 _worker_id, (void*) obj);
4082 4206 }
4083 4207
4084 4208 statsOnly( ++_steals );
4085 4209
4086 4210 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4087 4211 "any stolen object should be marked");
4088 4212 scan_object(obj);
4089 4213
4090 4214 // And since we're towards the end, let's totally drain the
4091 4215 // local queue and global stack.
4092 4216 drain_local_queue(false);
4093 4217 drain_global_stack(false);
4094 4218 } else {
4095 4219 break;
4096 4220 }
4097 4221 }
4098 4222 }
4099 4223
4100 4224 // If we are about to wrap up and go into termination, check if we
4101 4225 // should raise the overflow flag.
4102 4226 if (do_termination && !has_aborted()) {
4103 4227 if (_cm->force_overflow()->should_force()) {
4104 4228 _cm->set_has_overflown();
4105 4229 regular_clock_call();
4106 4230 }
4107 4231 }
4108 4232
4109 4233 // We still haven't aborted. Now, let's try to get into the
4110 4234 // termination protocol.
4111 4235 if (do_termination && !has_aborted()) {
4112 4236 // We cannot check whether the global stack is empty, since other
4113 4237 // tasks might be concurrently pushing objects on it.
4114 4238 // Separated the asserts so that we know which one fires.
4115 4239 assert(_cm->out_of_regions(), "only way to reach here");
4116 4240 assert(_task_queue->size() == 0, "only way to reach here");
4117 4241
4118 4242 if (_cm->verbose_low()) {
4119 4243 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4120 4244 }
4121 4245
4122 4246 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4123 4247 // The CMTask class also extends the TerminatorTerminator class,
4124 4248 // hence its should_exit_termination() method will also decide
4125 4249 // whether to exit the termination protocol or not.
4126 4250 bool finished = _cm->terminator()->offer_termination(this);
4127 4251 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4128 4252 _termination_time_ms +=
4129 4253 termination_end_time_ms - _termination_start_time_ms;
4130 4254
4131 4255 if (finished) {
4132 4256 // We're all done.
4133 4257
4134 4258 if (_worker_id == 0) {
4135 4259 // let's allow task 0 to do this
4136 4260 if (concurrent()) {
4137 4261 assert(_cm->concurrent_marking_in_progress(), "invariant");
4138 4262 // we need to set this to false before the next
4139 4263 // safepoint. This way we ensure that the marking phase
4140 4264 // doesn't observe any more heap expansions.
4141 4265 _cm->clear_concurrent_marking_in_progress();
4142 4266 }
4143 4267 }
4144 4268
4145 4269 // We can now guarantee that the global stack is empty, since
4146 4270 // all other tasks have finished. We separated the guarantees so
4147 4271 // that, if a condition is false, we can immediately find out
4148 4272 // which one.
4149 4273 guarantee(_cm->out_of_regions(), "only way to reach here");
4150 4274 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4151 4275 guarantee(_task_queue->size() == 0, "only way to reach here");
4152 4276 guarantee(!_cm->has_overflown(), "only way to reach here");
4153 4277 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4154 4278
4155 4279 if (_cm->verbose_low()) {
4156 4280 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4157 4281 }
4158 4282 } else {
4159 4283 // Apparently there's more work to do. Let's abort this task. It
4160 4284 // will restart it and we can hopefully find more things to do.
4161 4285
4162 4286 if (_cm->verbose_low()) {
4163 4287 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4164 4288 _worker_id);
4165 4289 }
4166 4290
4167 4291 set_has_aborted();
4168 4292 statsOnly( ++_aborted_termination );
4169 4293 }
4170 4294 }
4171 4295
4172 4296 // Mainly for debugging purposes to make sure that a pointer to the
4173 4297 // closure which was statically allocated in this frame doesn't
4174 4298 // escape it by accident.
4175 4299 set_cm_oop_closure(NULL);
4176 4300 double end_time_ms = os::elapsedVTime() * 1000.0;
4177 4301 double elapsed_time_ms = end_time_ms - _start_time_ms;
4178 4302 // Update the step history.
4179 4303 _step_times_ms.add(elapsed_time_ms);
4180 4304
4181 4305 if (has_aborted()) {
4182 4306 // The task was aborted for some reason.
4183 4307
4184 4308 statsOnly( ++_aborted );
4185 4309
4186 4310 if (_has_timed_out) {
4187 4311 double diff_ms = elapsed_time_ms - _time_target_ms;
4188 4312 // Keep statistics of how well we did with respect to hitting
4189 4313 // our target only if we actually timed out (if we aborted for
4190 4314 // other reasons, then the results might get skewed).
4191 4315 _marking_step_diffs_ms.add(diff_ms);
4192 4316 }
4193 4317
4194 4318 if (_cm->has_overflown()) {
4195 4319 // This is the interesting one. We aborted because a global
4196 4320 // overflow was raised. This means we have to restart the
4197 4321 // marking phase and start iterating over regions. However, in
4198 4322 // order to do this we have to make sure that all tasks stop
4199 4323 // what they are doing and re-initialise in a safe manner. We
4200 4324 // will achieve this with the use of two barrier sync points.
4201 4325
4202 4326 if (_cm->verbose_low()) {
4203 4327 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4204 4328 }
4205 4329
4206 4330 _cm->enter_first_sync_barrier(_worker_id);
4207 4331 // When we exit this sync barrier we know that all tasks have
4208 4332 // stopped doing marking work. So, it's now safe to
4209 4333 // re-initialise our data structures. At the end of this method,
4210 4334 // task 0 will clear the global data structures.
4211 4335
4212 4336 statsOnly( ++_aborted_overflow );
4213 4337
4214 4338 // We clear the local state of this task...
4215 4339 clear_region_fields();
4216 4340
4217 4341 // ...and enter the second barrier.
4218 4342 _cm->enter_second_sync_barrier(_worker_id);
4219 4343 // At this point everything has bee re-initialised and we're
4220 4344 // ready to restart.
4221 4345 }
4222 4346
4223 4347 if (_cm->verbose_low()) {
4224 4348 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4225 4349 "elapsed = %1.2lfms <<<<<<<<<<",
4226 4350 _worker_id, _time_target_ms, elapsed_time_ms);
4227 4351 if (_cm->has_aborted()) {
4228 4352 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4229 4353 _worker_id);
4230 4354 }
4231 4355 }
4232 4356 } else {
4233 4357 if (_cm->verbose_low()) {
4234 4358 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4235 4359 "elapsed = %1.2lfms <<<<<<<<<<",
4236 4360 _worker_id, _time_target_ms, elapsed_time_ms);
4237 4361 }
4238 4362 }
4239 4363
4240 4364 _claimed = false;
4241 4365 }
4242 4366
4243 4367 CMTask::CMTask(uint worker_id,
4244 4368 ConcurrentMark* cm,
4245 4369 size_t* marked_bytes,
4246 4370 BitMap* card_bm,
4247 4371 CMTaskQueue* task_queue,
4248 4372 CMTaskQueueSet* task_queues)
4249 4373 : _g1h(G1CollectedHeap::heap()),
4250 4374 _worker_id(worker_id), _cm(cm),
4251 4375 _claimed(false),
4252 4376 _nextMarkBitMap(NULL), _hash_seed(17),
4253 4377 _task_queue(task_queue),
4254 4378 _task_queues(task_queues),
4255 4379 _cm_oop_closure(NULL),
4256 4380 _marked_bytes_array(marked_bytes),
4257 4381 _card_bm(card_bm) {
4258 4382 guarantee(task_queue != NULL, "invariant");
4259 4383 guarantee(task_queues != NULL, "invariant");
4260 4384
4261 4385 statsOnly( _clock_due_to_scanning = 0;
4262 4386 _clock_due_to_marking = 0 );
4263 4387
4264 4388 _marking_step_diffs_ms.add(0.5);
4265 4389 }
4266 4390
4267 4391 // These are formatting macros that are used below to ensure
4268 4392 // consistent formatting. The *_H_* versions are used to format the
4269 4393 // header for a particular value and they should be kept consistent
4270 4394 // with the corresponding macro. Also note that most of the macros add
4271 4395 // the necessary white space (as a prefix) which makes them a bit
4272 4396 // easier to compose.
4273 4397
4274 4398 // All the output lines are prefixed with this string to be able to
4275 4399 // identify them easily in a large log file.
4276 4400 #define G1PPRL_LINE_PREFIX "###"
4277 4401
4278 4402 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4279 4403 #ifdef _LP64
4280 4404 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4281 4405 #else // _LP64
4282 4406 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4283 4407 #endif // _LP64
4284 4408
4285 4409 // For per-region info
4286 4410 #define G1PPRL_TYPE_FORMAT " %-4s"
4287 4411 #define G1PPRL_TYPE_H_FORMAT " %4s"
4288 4412 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4289 4413 #define G1PPRL_BYTE_H_FORMAT " %9s"
4290 4414 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4291 4415 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4292 4416
4293 4417 // For summary info
4294 4418 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4295 4419 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4296 4420 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4297 4421 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4298 4422
4299 4423 G1PrintRegionLivenessInfoClosure::
4300 4424 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4301 4425 : _out(out),
4302 4426 _total_used_bytes(0), _total_capacity_bytes(0),
4303 4427 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4304 4428 _hum_used_bytes(0), _hum_capacity_bytes(0),
4305 4429 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4306 4430 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4307 4431 MemRegion g1_committed = g1h->g1_committed();
4308 4432 MemRegion g1_reserved = g1h->g1_reserved();
4309 4433 double now = os::elapsedTime();
4310 4434
4311 4435 // Print the header of the output.
4312 4436 _out->cr();
4313 4437 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4314 4438 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4315 4439 G1PPRL_SUM_ADDR_FORMAT("committed")
4316 4440 G1PPRL_SUM_ADDR_FORMAT("reserved")
4317 4441 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4318 4442 g1_committed.start(), g1_committed.end(),
4319 4443 g1_reserved.start(), g1_reserved.end(),
4320 4444 HeapRegion::GrainBytes);
4321 4445 _out->print_cr(G1PPRL_LINE_PREFIX);
4322 4446 _out->print_cr(G1PPRL_LINE_PREFIX
4323 4447 G1PPRL_TYPE_H_FORMAT
4324 4448 G1PPRL_ADDR_BASE_H_FORMAT
4325 4449 G1PPRL_BYTE_H_FORMAT
4326 4450 G1PPRL_BYTE_H_FORMAT
4327 4451 G1PPRL_BYTE_H_FORMAT
4328 4452 G1PPRL_DOUBLE_H_FORMAT,
4329 4453 "type", "address-range",
4330 4454 "used", "prev-live", "next-live", "gc-eff");
4331 4455 _out->print_cr(G1PPRL_LINE_PREFIX
4332 4456 G1PPRL_TYPE_H_FORMAT
4333 4457 G1PPRL_ADDR_BASE_H_FORMAT
4334 4458 G1PPRL_BYTE_H_FORMAT
4335 4459 G1PPRL_BYTE_H_FORMAT
4336 4460 G1PPRL_BYTE_H_FORMAT
4337 4461 G1PPRL_DOUBLE_H_FORMAT,
4338 4462 "", "",
4339 4463 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4340 4464 }
4341 4465
4342 4466 // It takes as a parameter a reference to one of the _hum_* fields, it
4343 4467 // deduces the corresponding value for a region in a humongous region
4344 4468 // series (either the region size, or what's left if the _hum_* field
4345 4469 // is < the region size), and updates the _hum_* field accordingly.
4346 4470 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4347 4471 size_t bytes = 0;
4348 4472 // The > 0 check is to deal with the prev and next live bytes which
4349 4473 // could be 0.
4350 4474 if (*hum_bytes > 0) {
4351 4475 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4352 4476 *hum_bytes -= bytes;
4353 4477 }
4354 4478 return bytes;
4355 4479 }
4356 4480
4357 4481 // It deduces the values for a region in a humongous region series
4358 4482 // from the _hum_* fields and updates those accordingly. It assumes
4359 4483 // that that _hum_* fields have already been set up from the "starts
4360 4484 // humongous" region and we visit the regions in address order.
4361 4485 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4362 4486 size_t* capacity_bytes,
4363 4487 size_t* prev_live_bytes,
4364 4488 size_t* next_live_bytes) {
4365 4489 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4366 4490 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4367 4491 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4368 4492 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4369 4493 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4370 4494 }
4371 4495
4372 4496 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4373 4497 const char* type = "";
4374 4498 HeapWord* bottom = r->bottom();
4375 4499 HeapWord* end = r->end();
4376 4500 size_t capacity_bytes = r->capacity();
4377 4501 size_t used_bytes = r->used();
4378 4502 size_t prev_live_bytes = r->live_bytes();
4379 4503 size_t next_live_bytes = r->next_live_bytes();
4380 4504 double gc_eff = r->gc_efficiency();
4381 4505 if (r->used() == 0) {
4382 4506 type = "FREE";
4383 4507 } else if (r->is_survivor()) {
4384 4508 type = "SURV";
4385 4509 } else if (r->is_young()) {
4386 4510 type = "EDEN";
4387 4511 } else if (r->startsHumongous()) {
4388 4512 type = "HUMS";
4389 4513
4390 4514 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4391 4515 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4392 4516 "they should have been zeroed after the last time we used them");
4393 4517 // Set up the _hum_* fields.
4394 4518 _hum_capacity_bytes = capacity_bytes;
4395 4519 _hum_used_bytes = used_bytes;
4396 4520 _hum_prev_live_bytes = prev_live_bytes;
4397 4521 _hum_next_live_bytes = next_live_bytes;
4398 4522 get_hum_bytes(&used_bytes, &capacity_bytes,
4399 4523 &prev_live_bytes, &next_live_bytes);
4400 4524 end = bottom + HeapRegion::GrainWords;
4401 4525 } else if (r->continuesHumongous()) {
4402 4526 type = "HUMC";
4403 4527 get_hum_bytes(&used_bytes, &capacity_bytes,
4404 4528 &prev_live_bytes, &next_live_bytes);
4405 4529 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4406 4530 } else {
4407 4531 type = "OLD";
4408 4532 }
4409 4533
4410 4534 _total_used_bytes += used_bytes;
4411 4535 _total_capacity_bytes += capacity_bytes;
4412 4536 _total_prev_live_bytes += prev_live_bytes;
4413 4537 _total_next_live_bytes += next_live_bytes;
4414 4538
4415 4539 // Print a line for this particular region.
4416 4540 _out->print_cr(G1PPRL_LINE_PREFIX
4417 4541 G1PPRL_TYPE_FORMAT
4418 4542 G1PPRL_ADDR_BASE_FORMAT
4419 4543 G1PPRL_BYTE_FORMAT
4420 4544 G1PPRL_BYTE_FORMAT
4421 4545 G1PPRL_BYTE_FORMAT
4422 4546 G1PPRL_DOUBLE_FORMAT,
4423 4547 type, bottom, end,
4424 4548 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4425 4549
4426 4550 return false;
4427 4551 }
4428 4552
4429 4553 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4430 4554 // Print the footer of the output.
4431 4555 _out->print_cr(G1PPRL_LINE_PREFIX);
4432 4556 _out->print_cr(G1PPRL_LINE_PREFIX
4433 4557 " SUMMARY"
4434 4558 G1PPRL_SUM_MB_FORMAT("capacity")
4435 4559 G1PPRL_SUM_MB_PERC_FORMAT("used")
4436 4560 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4437 4561 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4438 4562 bytes_to_mb(_total_capacity_bytes),
4439 4563 bytes_to_mb(_total_used_bytes),
4440 4564 perc(_total_used_bytes, _total_capacity_bytes),
4441 4565 bytes_to_mb(_total_prev_live_bytes),
4442 4566 perc(_total_prev_live_bytes, _total_capacity_bytes),
4443 4567 bytes_to_mb(_total_next_live_bytes),
4444 4568 perc(_total_next_live_bytes, _total_capacity_bytes));
4445 4569 _out->cr();
4446 4570 }
↓ open down ↓ |
1583 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX