Print this page
rev 3708 : 8000244: G1: Ergonomically set MarkStackSize and use virtual space for global marking stack
Summary: Set the value of MarkStackSize to a value based on the number of parallel marking threads with a reasonable minimum. Expand the marking stack if we have to restart marking due to an overflow up to a reasonable maximum. Allocate the underlying space for the marking stack from virtual memory.
Reviewed-by: jmasa
rev 3709 : imported patch reuse-old-marking-stack
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 32 #include "gc_implementation/g1/g1Log.hpp"
33 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
34 34 #include "gc_implementation/g1/g1RemSet.hpp"
35 35 #include "gc_implementation/g1/heapRegion.inline.hpp"
36 36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
37 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
38 38 #include "gc_implementation/shared/vmGCOperations.hpp"
39 39 #include "memory/genOopClosures.inline.hpp"
40 40 #include "memory/referencePolicy.hpp"
41 41 #include "memory/resourceArea.hpp"
42 42 #include "oops/oop.inline.hpp"
43 43 #include "runtime/handles.inline.hpp"
44 44 #include "runtime/java.hpp"
45 45 #include "services/memTracker.hpp"
46 46
47 47 // Concurrent marking bit map wrapper
48 48
49 49 CMBitMapRO::CMBitMapRO(int shifter) :
50 50 _bm(),
51 51 _shifter(shifter) {
52 52 _bmStartWord = 0;
53 53 _bmWordSize = 0;
54 54 }
55 55
56 56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
57 57 HeapWord* limit) const {
58 58 // First we must round addr *up* to a possible object boundary.
59 59 addr = (HeapWord*)align_size_up((intptr_t)addr,
60 60 HeapWordSize << _shifter);
61 61 size_t addrOffset = heapWordToOffset(addr);
62 62 if (limit == NULL) {
63 63 limit = _bmStartWord + _bmWordSize;
64 64 }
65 65 size_t limitOffset = heapWordToOffset(limit);
66 66 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
67 67 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
68 68 assert(nextAddr >= addr, "get_next_one postcondition");
69 69 assert(nextAddr == limit || isMarked(nextAddr),
70 70 "get_next_one postcondition");
71 71 return nextAddr;
72 72 }
73 73
74 74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
75 75 HeapWord* limit) const {
76 76 size_t addrOffset = heapWordToOffset(addr);
77 77 if (limit == NULL) {
78 78 limit = _bmStartWord + _bmWordSize;
79 79 }
80 80 size_t limitOffset = heapWordToOffset(limit);
81 81 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
82 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
83 83 assert(nextAddr >= addr, "get_next_one postcondition");
84 84 assert(nextAddr == limit || !isMarked(nextAddr),
85 85 "get_next_one postcondition");
86 86 return nextAddr;
87 87 }
88 88
89 89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
90 90 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
91 91 return (int) (diff >> _shifter);
92 92 }
93 93
94 94 #ifndef PRODUCT
95 95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
96 96 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
97 97 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
98 98 "size inconsistency");
99 99 return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
100 100 _bmWordSize == heap_rs.size()>>LogHeapWordSize;
101 101 }
102 102 #endif
103 103
104 104 bool CMBitMap::allocate(ReservedSpace heap_rs) {
105 105 _bmStartWord = (HeapWord*)(heap_rs.base());
106 106 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes
107 107 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
108 108 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
109 109 if (!brs.is_reserved()) {
110 110 warning("ConcurrentMark marking bit map allocation failure");
111 111 return false;
112 112 }
113 113 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
114 114 // For now we'll just commit all of the bit map up front.
115 115 // Later on we'll try to be more parsimonious with swap.
116 116 if (!_virtual_space.initialize(brs, brs.size())) {
117 117 warning("ConcurrentMark marking bit map backing store failure");
118 118 return false;
119 119 }
120 120 assert(_virtual_space.committed_size() == brs.size(),
121 121 "didn't reserve backing store for all of concurrent marking bit map?");
122 122 _bm.set_map((uintptr_t*)_virtual_space.low());
123 123 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
124 124 _bmWordSize, "inconsistency in bit map sizing");
125 125 _bm.set_size(_bmWordSize >> _shifter);
126 126 return true;
127 127 }
128 128
129 129 void CMBitMap::clearAll() {
130 130 _bm.clear();
131 131 return;
132 132 }
133 133
134 134 void CMBitMap::markRange(MemRegion mr) {
135 135 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
136 136 assert(!mr.is_empty(), "unexpected empty region");
137 137 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
138 138 ((HeapWord *) mr.end())),
139 139 "markRange memory region end is not card aligned");
140 140 // convert address range into offset range
141 141 _bm.at_put_range(heapWordToOffset(mr.start()),
142 142 heapWordToOffset(mr.end()), true);
143 143 }
144 144
145 145 void CMBitMap::clearRange(MemRegion mr) {
146 146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
147 147 assert(!mr.is_empty(), "unexpected empty region");
148 148 // convert address range into offset range
149 149 _bm.at_put_range(heapWordToOffset(mr.start()),
150 150 heapWordToOffset(mr.end()), false);
151 151 }
152 152
153 153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
154 154 HeapWord* end_addr) {
155 155 HeapWord* start = getNextMarkedWordAddress(addr);
156 156 start = MIN2(start, end_addr);
157 157 HeapWord* end = getNextUnmarkedWordAddress(start);
158 158 end = MIN2(end, end_addr);
159 159 assert(start <= end, "Consistency check");
160 160 MemRegion mr(start, end);
161 161 if (!mr.is_empty()) {
162 162 clearRange(mr);
163 163 }
164 164 return mr;
165 165 }
166 166
167 167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
168 168 _base(NULL), _cm(cm)
169 169 #ifdef ASSERT
170 170 , _drain_in_progress(false)
171 171 , _drain_in_progress_yields(false)
172 172 #endif
173 173 {}
174 174
175 175 bool CMMarkStack::allocate(size_t capacity) {
176 176 // allocate a stack of the requisite depth
177 177 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
178 178 if (!rs.is_reserved()) {
179 179 warning("ConcurrentMark MarkStack allocation failure");
180 180 return false;
↓ open down ↓ |
180 lines elided |
↑ open up ↑ |
181 181 }
182 182 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
183 183 if (!_virtual_space.initialize(rs, rs.size())) {
184 184 warning("ConcurrentMark MarkStack backing store failure");
185 185 // Release the virtual memory reserved for the marking stack
186 186 rs.release();
187 187 return false;
188 188 }
189 189 assert(_virtual_space.committed_size() == rs.size(),
190 190 "Didn't reserve backing store for all of ConcurrentMark stack?");
191 + _rs = rs;
191 192 _base = (oop*) _virtual_space.low();
192 193 setEmpty();
193 194 _capacity = (jint) capacity;
194 195 _saved_index = -1;
195 196 NOT_PRODUCT(_max_depth = 0);
196 197 return true;
197 198 }
198 199
199 200 void CMMarkStack::expand() {
200 201 // Called, during remark, if we've overflown the marking stack during marking.
201 202 assert(isEmpty(), "stack should been emptied while handling overflow");
202 203 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
203 204 // Clear expansion flag
204 205 _should_expand = false;
205 206 if (_capacity == (jint) MarkStackSizeMax) {
206 207 if (PrintGCDetails && Verbose) {
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
207 208 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
208 209 }
209 210 return;
210 211 }
211 212 // Double capacity if possible
212 213 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
213 214 // Do not give up existing stack until we have managed to
214 215 // get the double capacity that we desired.
215 216 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
216 217 sizeof(oop)));
217 - if (rs.is_reserved()) {
218 - // Release the backing store associated with old stack
219 - _virtual_space.release();
220 - // Reinitialize virtual space for new stack
221 - if (!_virtual_space.initialize(rs, rs.size())) {
222 - fatal("Not enough swap for expanded marking stack capacity");
223 - }
224 - _base = (oop*)(_virtual_space.low());
225 - _index = 0;
226 - _capacity = new_capacity;
227 - } else {
218 + if (!rs.is_reserved()) {
228 219 if (PrintGCDetails && Verbose) {
229 220 // Failed to double capacity, continue;
230 221 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
231 - SIZE_FORMAT"K to " SIZE_FORMAT"K",
222 + SIZE_FORMAT "K to " SIZE_FORMAT "K",
232 223 _capacity / K, new_capacity / K);
233 224 }
225 + return;
226 + }
227 +
228 + // Clear the backing store fields associated with the space for the
229 + // old marking stack. Note this doesn't actuall release the space.
230 + _virtual_space.release();
231 +
232 + // Reinitialize virtual space for the expanded stack.
233 + if (!_virtual_space.initialize(rs, rs.size())) {
234 + // We failed to commit the the space for the expanded marking stack
235 + // Release the expanded reserved space...
236 + rs.release();
237 + // ... and reinitialize with the previous un-expanded space.
238 + if (_virtual_space.initialize(_rs, _rs.size())) {
239 + if (PrintGCDetails && Verbose) {
240 + gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
241 + SIZE_FORMAT "K to " SIZE_FORMAT "K",
242 + _capacity / K, new_capacity / K);
243 + }
244 + } else {
245 + // The previous backing store space should have been already
246 + // committed but we failed to initialize the virtual space
247 + // for some reason.
248 + fatal("Error re-initializing marking stack with old capacity");
249 + }
250 + } else {
251 + // We successfully committed the space for the expanded marking stack.
252 + if (PrintGCDetails && Verbose) {
253 + gclog_or_tty->print(" Successfully expanded marking stack capacity from "
254 + SIZE_FORMAT "K to " SIZE_FORMAT "K",
255 + _capacity / K, new_capacity / K);
256 + }
257 + // Release the previous (unexpanded) space.
258 + _rs.release();
259 + // Record the new (expanded) space.
260 + _rs = rs;
261 + // Record the new capacity
262 + _capacity = new_capacity;
234 263 }
264 + assert(_virtual_space.committed_size() == _rs.size(),
265 + "Didn't reserve backing store for all of ConcurrentMark stack?");
266 + _base = (oop*)(_virtual_space.low());
267 + _index = 0;
235 268 }
236 269
237 270 void CMMarkStack::set_should_expand() {
238 271 // If we're resetting the marking state because of an
239 272 // marking stack overflow, record that we should, if
240 273 // possible, expand the stack.
241 274 _should_expand = _cm->has_overflown();
242 275 }
243 276
244 277 CMMarkStack::~CMMarkStack() {
245 278 if (_base != NULL) {
246 279 _base = NULL;
247 280 _virtual_space.release();
248 281 }
249 282 }
250 283
251 284 void CMMarkStack::par_push(oop ptr) {
252 285 while (true) {
253 286 if (isFull()) {
254 287 _overflow = true;
255 288 return;
256 289 }
257 290 // Otherwise...
258 291 jint index = _index;
259 292 jint next_index = index+1;
260 293 jint res = Atomic::cmpxchg(next_index, &_index, index);
261 294 if (res == index) {
262 295 _base[index] = ptr;
263 296 // Note that we don't maintain this atomically. We could, but it
264 297 // doesn't seem necessary.
265 298 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
266 299 return;
267 300 }
268 301 // Otherwise, we need to try again.
269 302 }
270 303 }
271 304
272 305 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
273 306 while (true) {
274 307 if (isFull()) {
275 308 _overflow = true;
276 309 return;
277 310 }
278 311 // Otherwise...
279 312 jint index = _index;
280 313 jint next_index = index + n;
281 314 if (next_index > _capacity) {
282 315 _overflow = true;
283 316 return;
284 317 }
285 318 jint res = Atomic::cmpxchg(next_index, &_index, index);
286 319 if (res == index) {
287 320 for (int i = 0; i < n; i++) {
288 321 int ind = index + i;
289 322 assert(ind < _capacity, "By overflow test above.");
290 323 _base[ind] = ptr_arr[i];
291 324 }
292 325 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
293 326 return;
294 327 }
295 328 // Otherwise, we need to try again.
296 329 }
297 330 }
298 331
299 332 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
300 333 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
301 334 jint start = _index;
302 335 jint next_index = start + n;
303 336 if (next_index > _capacity) {
304 337 _overflow = true;
305 338 return;
306 339 }
307 340 // Otherwise.
308 341 _index = next_index;
309 342 for (int i = 0; i < n; i++) {
310 343 int ind = start + i;
311 344 assert(ind < _capacity, "By overflow test above.");
312 345 _base[ind] = ptr_arr[i];
313 346 }
314 347 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
315 348 }
316 349
317 350 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
318 351 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
319 352 jint index = _index;
320 353 if (index == 0) {
321 354 *n = 0;
322 355 return false;
323 356 } else {
324 357 int k = MIN2(max, index);
325 358 jint new_ind = index - k;
326 359 for (int j = 0; j < k; j++) {
327 360 ptr_arr[j] = _base[new_ind + j];
328 361 }
329 362 _index = new_ind;
330 363 *n = k;
331 364 return true;
332 365 }
333 366 }
334 367
335 368 template<class OopClosureClass>
336 369 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
337 370 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
338 371 || SafepointSynchronize::is_at_safepoint(),
339 372 "Drain recursion must be yield-safe.");
340 373 bool res = true;
341 374 debug_only(_drain_in_progress = true);
342 375 debug_only(_drain_in_progress_yields = yield_after);
343 376 while (!isEmpty()) {
344 377 oop newOop = pop();
345 378 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
346 379 assert(newOop->is_oop(), "Expected an oop");
347 380 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
348 381 "only grey objects on this stack");
349 382 newOop->oop_iterate(cl);
350 383 if (yield_after && _cm->do_yield_check()) {
351 384 res = false;
352 385 break;
353 386 }
354 387 }
355 388 debug_only(_drain_in_progress = false);
356 389 return res;
357 390 }
358 391
359 392 void CMMarkStack::note_start_of_gc() {
360 393 assert(_saved_index == -1,
361 394 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
362 395 _saved_index = _index;
363 396 }
364 397
365 398 void CMMarkStack::note_end_of_gc() {
366 399 // This is intentionally a guarantee, instead of an assert. If we
367 400 // accidentally add something to the mark stack during GC, it
368 401 // will be a correctness issue so it's better if we crash. we'll
369 402 // only check this once per GC anyway, so it won't be a performance
370 403 // issue in any way.
371 404 guarantee(_saved_index == _index,
372 405 err_msg("saved index: %d index: %d", _saved_index, _index));
373 406 _saved_index = -1;
374 407 }
375 408
376 409 void CMMarkStack::oops_do(OopClosure* f) {
377 410 assert(_saved_index == _index,
378 411 err_msg("saved index: %d index: %d", _saved_index, _index));
379 412 for (int i = 0; i < _index; i += 1) {
380 413 f->do_oop(&_base[i]);
381 414 }
382 415 }
383 416
384 417 bool ConcurrentMark::not_yet_marked(oop obj) const {
385 418 return _g1h->is_obj_ill(obj);
386 419 }
387 420
388 421 CMRootRegions::CMRootRegions() :
389 422 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
390 423 _should_abort(false), _next_survivor(NULL) { }
391 424
392 425 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
393 426 _young_list = g1h->young_list();
394 427 _cm = cm;
395 428 }
396 429
397 430 void CMRootRegions::prepare_for_scan() {
398 431 assert(!scan_in_progress(), "pre-condition");
399 432
400 433 // Currently, only survivors can be root regions.
401 434 assert(_next_survivor == NULL, "pre-condition");
402 435 _next_survivor = _young_list->first_survivor_region();
403 436 _scan_in_progress = (_next_survivor != NULL);
404 437 _should_abort = false;
405 438 }
406 439
407 440 HeapRegion* CMRootRegions::claim_next() {
408 441 if (_should_abort) {
409 442 // If someone has set the should_abort flag, we return NULL to
410 443 // force the caller to bail out of their loop.
411 444 return NULL;
412 445 }
413 446
414 447 // Currently, only survivors can be root regions.
415 448 HeapRegion* res = _next_survivor;
416 449 if (res != NULL) {
417 450 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
418 451 // Read it again in case it changed while we were waiting for the lock.
419 452 res = _next_survivor;
420 453 if (res != NULL) {
421 454 if (res == _young_list->last_survivor_region()) {
422 455 // We just claimed the last survivor so store NULL to indicate
423 456 // that we're done.
424 457 _next_survivor = NULL;
425 458 } else {
426 459 _next_survivor = res->get_next_young_region();
427 460 }
428 461 } else {
429 462 // Someone else claimed the last survivor while we were trying
430 463 // to take the lock so nothing else to do.
431 464 }
432 465 }
433 466 assert(res == NULL || res->is_survivor(), "post-condition");
434 467
435 468 return res;
436 469 }
437 470
438 471 void CMRootRegions::scan_finished() {
439 472 assert(scan_in_progress(), "pre-condition");
440 473
441 474 // Currently, only survivors can be root regions.
442 475 if (!_should_abort) {
443 476 assert(_next_survivor == NULL, "we should have claimed all survivors");
444 477 }
445 478 _next_survivor = NULL;
446 479
447 480 {
448 481 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
449 482 _scan_in_progress = false;
450 483 RootRegionScan_lock->notify_all();
451 484 }
452 485 }
453 486
454 487 bool CMRootRegions::wait_until_scan_finished() {
455 488 if (!scan_in_progress()) return false;
456 489
457 490 {
458 491 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
459 492 while (scan_in_progress()) {
460 493 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
461 494 }
462 495 }
463 496 return true;
464 497 }
465 498
466 499 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
467 500 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
468 501 #endif // _MSC_VER
469 502
470 503 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
471 504 return MAX2((n_par_threads + 2) / 4, 1U);
472 505 }
473 506
474 507 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
475 508 _g1h(g1h),
476 509 _markBitMap1(MinObjAlignment - 1),
477 510 _markBitMap2(MinObjAlignment - 1),
478 511
479 512 _parallel_marking_threads(0),
480 513 _max_parallel_marking_threads(0),
481 514 _sleep_factor(0.0),
482 515 _marking_task_overhead(1.0),
483 516 _cleanup_sleep_factor(0.0),
484 517 _cleanup_task_overhead(1.0),
485 518 _cleanup_list("Cleanup List"),
486 519 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
487 520 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
488 521 CardTableModRefBS::card_shift,
489 522 false /* in_resource_area*/),
490 523
491 524 _prevMarkBitMap(&_markBitMap1),
492 525 _nextMarkBitMap(&_markBitMap2),
493 526
494 527 _markStack(this),
495 528 // _finger set in set_non_marking_state
496 529
497 530 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
498 531 // _active_tasks set in set_non_marking_state
499 532 // _tasks set inside the constructor
500 533 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
501 534 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
502 535
503 536 _has_overflown(false),
504 537 _concurrent(false),
505 538 _has_aborted(false),
506 539 _restart_for_overflow(false),
507 540 _concurrent_marking_in_progress(false),
508 541
509 542 // _verbose_level set below
510 543
511 544 _init_times(),
512 545 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
513 546 _cleanup_times(),
514 547 _total_counting_time(0.0),
515 548 _total_rs_scrub_time(0.0),
516 549
517 550 _parallel_workers(NULL),
518 551
519 552 _count_card_bitmaps(NULL),
520 553 _count_marked_bytes(NULL),
521 554 _completed_initialization(false) {
522 555 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
523 556 if (verbose_level < no_verbose) {
524 557 verbose_level = no_verbose;
525 558 }
526 559 if (verbose_level > high_verbose) {
527 560 verbose_level = high_verbose;
528 561 }
529 562 _verbose_level = verbose_level;
530 563
531 564 if (verbose_low()) {
532 565 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
533 566 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
534 567 }
535 568
536 569 if (!_markBitMap1.allocate(heap_rs)) {
537 570 warning("Failed to allocate first CM bit map");
538 571 return;
539 572 }
540 573 if (!_markBitMap2.allocate(heap_rs)) {
541 574 warning("Failed to allocate second CM bit map");
542 575 return;
543 576 }
544 577
545 578 // Create & start a ConcurrentMark thread.
546 579 _cmThread = new ConcurrentMarkThread(this);
547 580 assert(cmThread() != NULL, "CM Thread should have been created");
548 581 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
549 582
550 583 assert(CGC_lock != NULL, "Where's the CGC_lock?");
551 584 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
552 585 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
553 586
554 587 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
555 588 satb_qs.set_buffer_size(G1SATBBufferSize);
556 589
557 590 _root_regions.init(_g1h, this);
558 591
559 592 if (ConcGCThreads > ParallelGCThreads) {
560 593 warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
561 594 "than ParallelGCThreads (" UINT32_FORMAT ").",
562 595 ConcGCThreads, ParallelGCThreads);
563 596 return;
564 597 }
565 598 if (ParallelGCThreads == 0) {
566 599 // if we are not running with any parallel GC threads we will not
567 600 // spawn any marking threads either
568 601 _parallel_marking_threads = 0;
569 602 _max_parallel_marking_threads = 0;
570 603 _sleep_factor = 0.0;
571 604 _marking_task_overhead = 1.0;
572 605 } else {
573 606 if (ConcGCThreads > 0) {
574 607 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
575 608 // if both are set
576 609
577 610 _parallel_marking_threads = (uint) ConcGCThreads;
578 611 _max_parallel_marking_threads = _parallel_marking_threads;
579 612 _sleep_factor = 0.0;
580 613 _marking_task_overhead = 1.0;
581 614 } else if (G1MarkingOverheadPercent > 0) {
582 615 // we will calculate the number of parallel marking threads
583 616 // based on a target overhead with respect to the soft real-time
584 617 // goal
585 618
586 619 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
587 620 double overall_cm_overhead =
588 621 (double) MaxGCPauseMillis * marking_overhead /
589 622 (double) GCPauseIntervalMillis;
590 623 double cpu_ratio = 1.0 / (double) os::processor_count();
591 624 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
592 625 double marking_task_overhead =
593 626 overall_cm_overhead / marking_thread_num *
594 627 (double) os::processor_count();
595 628 double sleep_factor =
596 629 (1.0 - marking_task_overhead) / marking_task_overhead;
597 630
598 631 _parallel_marking_threads = (uint) marking_thread_num;
599 632 _max_parallel_marking_threads = _parallel_marking_threads;
600 633 _sleep_factor = sleep_factor;
601 634 _marking_task_overhead = marking_task_overhead;
602 635 } else {
603 636 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
604 637 _max_parallel_marking_threads = _parallel_marking_threads;
605 638 _sleep_factor = 0.0;
606 639 _marking_task_overhead = 1.0;
607 640 }
608 641
609 642 if (parallel_marking_threads() > 1) {
610 643 _cleanup_task_overhead = 1.0;
611 644 } else {
612 645 _cleanup_task_overhead = marking_task_overhead();
613 646 }
614 647 _cleanup_sleep_factor =
615 648 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
616 649
617 650 #if 0
618 651 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
619 652 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
620 653 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
621 654 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
622 655 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
623 656 #endif
624 657
625 658 guarantee(parallel_marking_threads() > 0, "peace of mind");
626 659 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
627 660 _max_parallel_marking_threads, false, true);
628 661 if (_parallel_workers == NULL) {
629 662 vm_exit_during_initialization("Failed necessary allocation.");
630 663 } else {
631 664 _parallel_workers->initialize_workers();
632 665 }
633 666 }
634 667
635 668 if (FLAG_IS_DEFAULT(MarkStackSize)) {
636 669 uintx mark_stack_size =
637 670 MIN2(MarkStackSizeMax,
638 671 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
639 672 // Verify that the calculated value for MarkStackSize is in range.
640 673 // It would be nice to use the private utility routine from Arguments.
641 674 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
642 675 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
643 676 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
644 677 mark_stack_size, 1, MarkStackSizeMax);
645 678 return;
646 679 }
647 680 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
648 681 } else {
649 682 // Verify MarkStackSize is in range.
650 683 if (FLAG_IS_CMDLINE(MarkStackSize)) {
651 684 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
652 685 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
653 686 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
654 687 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
655 688 MarkStackSize, 1, MarkStackSizeMax);
656 689 return;
657 690 }
658 691 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
659 692 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
660 693 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
661 694 " or for MarkStackSizeMax (" UINTX_FORMAT ")",
662 695 MarkStackSize, MarkStackSizeMax);
663 696 return;
664 697 }
665 698 }
666 699 }
667 700 }
668 701
669 702 if (!_markStack.allocate(MarkStackSize)) {
670 703 warning("Failed to allocate CM marking stack");
671 704 return;
672 705 }
673 706
674 707 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
675 708 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
676 709
677 710 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
678 711 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
679 712
680 713 BitMap::idx_t card_bm_size = _card_bm.size();
681 714
682 715 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
683 716 _active_tasks = _max_worker_id;
684 717
685 718 size_t max_regions = (size_t) _g1h->max_regions();
686 719 for (uint i = 0; i < _max_worker_id; ++i) {
687 720 CMTaskQueue* task_queue = new CMTaskQueue();
688 721 task_queue->initialize();
689 722 _task_queues->register_queue(i, task_queue);
690 723
691 724 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
692 725 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
693 726
694 727 _tasks[i] = new CMTask(i, this,
695 728 _count_marked_bytes[i],
696 729 &_count_card_bitmaps[i],
697 730 task_queue, _task_queues);
698 731
699 732 _accum_task_vtime[i] = 0.0;
700 733 }
701 734
702 735 // Calculate the card number for the bottom of the heap. Used
703 736 // in biasing indexes into the accounting card bitmaps.
704 737 _heap_bottom_card_num =
705 738 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
706 739 CardTableModRefBS::card_shift);
707 740
708 741 // Clear all the liveness counting data
709 742 clear_all_count_data();
710 743
711 744 // so that the call below can read a sensible value
712 745 _heap_start = (HeapWord*) heap_rs.base();
713 746 set_non_marking_state();
714 747 _completed_initialization = true;
715 748 }
716 749
717 750 void ConcurrentMark::update_g1_committed(bool force) {
718 751 // If concurrent marking is not in progress, then we do not need to
719 752 // update _heap_end.
720 753 if (!concurrent_marking_in_progress() && !force) return;
721 754
722 755 MemRegion committed = _g1h->g1_committed();
723 756 assert(committed.start() == _heap_start, "start shouldn't change");
724 757 HeapWord* new_end = committed.end();
725 758 if (new_end > _heap_end) {
726 759 // The heap has been expanded.
727 760
728 761 _heap_end = new_end;
729 762 }
730 763 // Notice that the heap can also shrink. However, this only happens
731 764 // during a Full GC (at least currently) and the entire marking
732 765 // phase will bail out and the task will not be restarted. So, let's
733 766 // do nothing.
734 767 }
735 768
736 769 void ConcurrentMark::reset() {
737 770 // Starting values for these two. This should be called in a STW
738 771 // phase. CM will be notified of any future g1_committed expansions
739 772 // will be at the end of evacuation pauses, when tasks are
740 773 // inactive.
741 774 MemRegion committed = _g1h->g1_committed();
742 775 _heap_start = committed.start();
743 776 _heap_end = committed.end();
744 777
745 778 // Separated the asserts so that we know which one fires.
746 779 assert(_heap_start != NULL, "heap bounds should look ok");
747 780 assert(_heap_end != NULL, "heap bounds should look ok");
748 781 assert(_heap_start < _heap_end, "heap bounds should look ok");
749 782
750 783 // reset all the marking data structures and any necessary flags
751 784 clear_marking_state();
752 785
753 786 if (verbose_low()) {
754 787 gclog_or_tty->print_cr("[global] resetting");
755 788 }
756 789
757 790 // We do reset all of them, since different phases will use
758 791 // different number of active threads. So, it's easiest to have all
759 792 // of them ready.
760 793 for (uint i = 0; i < _max_worker_id; ++i) {
761 794 _tasks[i]->reset(_nextMarkBitMap);
762 795 }
763 796
764 797 // we need this to make sure that the flag is on during the evac
765 798 // pause with initial mark piggy-backed
766 799 set_concurrent_marking_in_progress();
767 800 }
768 801
769 802 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
770 803 assert(active_tasks <= _max_worker_id, "we should not have more");
771 804
772 805 _active_tasks = active_tasks;
773 806 // Need to update the three data structures below according to the
774 807 // number of active threads for this phase.
775 808 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
776 809 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
777 810 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
778 811
779 812 _concurrent = concurrent;
780 813 // We propagate this to all tasks, not just the active ones.
781 814 for (uint i = 0; i < _max_worker_id; ++i)
782 815 _tasks[i]->set_concurrent(concurrent);
783 816
784 817 if (concurrent) {
785 818 set_concurrent_marking_in_progress();
786 819 } else {
787 820 // We currently assume that the concurrent flag has been set to
788 821 // false before we start remark. At this point we should also be
789 822 // in a STW phase.
790 823 assert(!concurrent_marking_in_progress(), "invariant");
791 824 assert(_finger == _heap_end, "only way to get here");
792 825 update_g1_committed(true);
793 826 }
794 827 }
795 828
796 829 void ConcurrentMark::set_non_marking_state() {
797 830 // We set the global marking state to some default values when we're
798 831 // not doing marking.
799 832 clear_marking_state();
800 833 _active_tasks = 0;
801 834 clear_concurrent_marking_in_progress();
802 835 }
803 836
804 837 ConcurrentMark::~ConcurrentMark() {
805 838 // The ConcurrentMark instance is never freed.
806 839 ShouldNotReachHere();
807 840 }
808 841
809 842 void ConcurrentMark::clearNextBitmap() {
810 843 G1CollectedHeap* g1h = G1CollectedHeap::heap();
811 844 G1CollectorPolicy* g1p = g1h->g1_policy();
812 845
813 846 // Make sure that the concurrent mark thread looks to still be in
814 847 // the current cycle.
815 848 guarantee(cmThread()->during_cycle(), "invariant");
816 849
817 850 // We are finishing up the current cycle by clearing the next
818 851 // marking bitmap and getting it ready for the next cycle. During
819 852 // this time no other cycle can start. So, let's make sure that this
820 853 // is the case.
821 854 guarantee(!g1h->mark_in_progress(), "invariant");
822 855
823 856 // clear the mark bitmap (no grey objects to start with).
824 857 // We need to do this in chunks and offer to yield in between
825 858 // each chunk.
826 859 HeapWord* start = _nextMarkBitMap->startWord();
827 860 HeapWord* end = _nextMarkBitMap->endWord();
828 861 HeapWord* cur = start;
829 862 size_t chunkSize = M;
830 863 while (cur < end) {
831 864 HeapWord* next = cur + chunkSize;
832 865 if (next > end) {
833 866 next = end;
834 867 }
835 868 MemRegion mr(cur,next);
836 869 _nextMarkBitMap->clearRange(mr);
837 870 cur = next;
838 871 do_yield_check();
839 872
840 873 // Repeat the asserts from above. We'll do them as asserts here to
841 874 // minimize their overhead on the product. However, we'll have
842 875 // them as guarantees at the beginning / end of the bitmap
843 876 // clearing to get some checking in the product.
844 877 assert(cmThread()->during_cycle(), "invariant");
845 878 assert(!g1h->mark_in_progress(), "invariant");
846 879 }
847 880
848 881 // Clear the liveness counting data
849 882 clear_all_count_data();
850 883
851 884 // Repeat the asserts from above.
852 885 guarantee(cmThread()->during_cycle(), "invariant");
853 886 guarantee(!g1h->mark_in_progress(), "invariant");
854 887 }
855 888
856 889 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
857 890 public:
858 891 bool doHeapRegion(HeapRegion* r) {
859 892 if (!r->continuesHumongous()) {
860 893 r->note_start_of_marking();
861 894 }
862 895 return false;
863 896 }
864 897 };
865 898
866 899 void ConcurrentMark::checkpointRootsInitialPre() {
867 900 G1CollectedHeap* g1h = G1CollectedHeap::heap();
868 901 G1CollectorPolicy* g1p = g1h->g1_policy();
869 902
870 903 _has_aborted = false;
871 904
872 905 #ifndef PRODUCT
873 906 if (G1PrintReachableAtInitialMark) {
874 907 print_reachable("at-cycle-start",
875 908 VerifyOption_G1UsePrevMarking, true /* all */);
876 909 }
877 910 #endif
878 911
879 912 // Initialise marking structures. This has to be done in a STW phase.
880 913 reset();
881 914
882 915 // For each region note start of marking.
883 916 NoteStartOfMarkHRClosure startcl;
884 917 g1h->heap_region_iterate(&startcl);
885 918 }
886 919
887 920
888 921 void ConcurrentMark::checkpointRootsInitialPost() {
889 922 G1CollectedHeap* g1h = G1CollectedHeap::heap();
890 923
891 924 // If we force an overflow during remark, the remark operation will
892 925 // actually abort and we'll restart concurrent marking. If we always
893 926 // force an oveflow during remark we'll never actually complete the
894 927 // marking phase. So, we initilize this here, at the start of the
895 928 // cycle, so that at the remaining overflow number will decrease at
896 929 // every remark and we'll eventually not need to cause one.
897 930 force_overflow_stw()->init();
898 931
899 932 // Start Concurrent Marking weak-reference discovery.
900 933 ReferenceProcessor* rp = g1h->ref_processor_cm();
901 934 // enable ("weak") refs discovery
902 935 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
903 936 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
904 937
905 938 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
906 939 // This is the start of the marking cycle, we're expected all
907 940 // threads to have SATB queues with active set to false.
908 941 satb_mq_set.set_active_all_threads(true, /* new active value */
909 942 false /* expected_active */);
910 943
911 944 _root_regions.prepare_for_scan();
912 945
913 946 // update_g1_committed() will be called at the end of an evac pause
914 947 // when marking is on. So, it's also called at the end of the
915 948 // initial-mark pause to update the heap end, if the heap expands
916 949 // during it. No need to call it here.
917 950 }
918 951
919 952 /*
920 953 * Notice that in the next two methods, we actually leave the STS
921 954 * during the barrier sync and join it immediately afterwards. If we
922 955 * do not do this, the following deadlock can occur: one thread could
923 956 * be in the barrier sync code, waiting for the other thread to also
924 957 * sync up, whereas another one could be trying to yield, while also
925 958 * waiting for the other threads to sync up too.
926 959 *
927 960 * Note, however, that this code is also used during remark and in
928 961 * this case we should not attempt to leave / enter the STS, otherwise
929 962 * we'll either hit an asseert (debug / fastdebug) or deadlock
930 963 * (product). So we should only leave / enter the STS if we are
931 964 * operating concurrently.
932 965 *
933 966 * Because the thread that does the sync barrier has left the STS, it
934 967 * is possible to be suspended for a Full GC or an evacuation pause
935 968 * could occur. This is actually safe, since the entering the sync
936 969 * barrier is one of the last things do_marking_step() does, and it
937 970 * doesn't manipulate any data structures afterwards.
938 971 */
939 972
940 973 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
941 974 if (verbose_low()) {
942 975 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
943 976 }
944 977
945 978 if (concurrent()) {
946 979 ConcurrentGCThread::stsLeave();
947 980 }
948 981 _first_overflow_barrier_sync.enter();
949 982 if (concurrent()) {
950 983 ConcurrentGCThread::stsJoin();
951 984 }
952 985 // at this point everyone should have synced up and not be doing any
953 986 // more work
954 987
955 988 if (verbose_low()) {
956 989 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
957 990 }
958 991
959 992 // let the task associated with with worker 0 do this
960 993 if (worker_id == 0) {
961 994 // task 0 is responsible for clearing the global data structures
962 995 // We should be here because of an overflow. During STW we should
963 996 // not clear the overflow flag since we rely on it being true when
964 997 // we exit this method to abort the pause and restart concurent
965 998 // marking.
966 999 clear_marking_state(concurrent() /* clear_overflow */);
967 1000 force_overflow()->update();
968 1001
969 1002 if (G1Log::fine()) {
970 1003 gclog_or_tty->date_stamp(PrintGCDateStamps);
971 1004 gclog_or_tty->stamp(PrintGCTimeStamps);
972 1005 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
973 1006 }
974 1007 }
975 1008
976 1009 // after this, each task should reset its own data structures then
977 1010 // then go into the second barrier
978 1011 }
979 1012
980 1013 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
981 1014 if (verbose_low()) {
982 1015 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
983 1016 }
984 1017
985 1018 if (concurrent()) {
986 1019 ConcurrentGCThread::stsLeave();
987 1020 }
988 1021 _second_overflow_barrier_sync.enter();
989 1022 if (concurrent()) {
990 1023 ConcurrentGCThread::stsJoin();
991 1024 }
992 1025 // at this point everything should be re-initialised and ready to go
993 1026
994 1027 if (verbose_low()) {
995 1028 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
996 1029 }
997 1030 }
998 1031
999 1032 #ifndef PRODUCT
1000 1033 void ForceOverflowSettings::init() {
1001 1034 _num_remaining = G1ConcMarkForceOverflow;
1002 1035 _force = false;
1003 1036 update();
1004 1037 }
1005 1038
1006 1039 void ForceOverflowSettings::update() {
1007 1040 if (_num_remaining > 0) {
1008 1041 _num_remaining -= 1;
1009 1042 _force = true;
1010 1043 } else {
1011 1044 _force = false;
1012 1045 }
1013 1046 }
1014 1047
1015 1048 bool ForceOverflowSettings::should_force() {
1016 1049 if (_force) {
1017 1050 _force = false;
1018 1051 return true;
1019 1052 } else {
1020 1053 return false;
1021 1054 }
1022 1055 }
1023 1056 #endif // !PRODUCT
1024 1057
1025 1058 class CMConcurrentMarkingTask: public AbstractGangTask {
1026 1059 private:
1027 1060 ConcurrentMark* _cm;
1028 1061 ConcurrentMarkThread* _cmt;
1029 1062
1030 1063 public:
1031 1064 void work(uint worker_id) {
1032 1065 assert(Thread::current()->is_ConcurrentGC_thread(),
1033 1066 "this should only be done by a conc GC thread");
1034 1067 ResourceMark rm;
1035 1068
1036 1069 double start_vtime = os::elapsedVTime();
1037 1070
1038 1071 ConcurrentGCThread::stsJoin();
1039 1072
1040 1073 assert(worker_id < _cm->active_tasks(), "invariant");
1041 1074 CMTask* the_task = _cm->task(worker_id);
1042 1075 the_task->record_start_time();
1043 1076 if (!_cm->has_aborted()) {
1044 1077 do {
1045 1078 double start_vtime_sec = os::elapsedVTime();
1046 1079 double start_time_sec = os::elapsedTime();
1047 1080 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1048 1081
1049 1082 the_task->do_marking_step(mark_step_duration_ms,
1050 1083 true /* do_stealing */,
1051 1084 true /* do_termination */);
1052 1085
1053 1086 double end_time_sec = os::elapsedTime();
1054 1087 double end_vtime_sec = os::elapsedVTime();
1055 1088 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1056 1089 double elapsed_time_sec = end_time_sec - start_time_sec;
1057 1090 _cm->clear_has_overflown();
1058 1091
1059 1092 bool ret = _cm->do_yield_check(worker_id);
1060 1093
1061 1094 jlong sleep_time_ms;
1062 1095 if (!_cm->has_aborted() && the_task->has_aborted()) {
1063 1096 sleep_time_ms =
1064 1097 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1065 1098 ConcurrentGCThread::stsLeave();
1066 1099 os::sleep(Thread::current(), sleep_time_ms, false);
1067 1100 ConcurrentGCThread::stsJoin();
1068 1101 }
1069 1102 double end_time2_sec = os::elapsedTime();
1070 1103 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1071 1104
1072 1105 #if 0
1073 1106 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1074 1107 "overhead %1.4lf",
1075 1108 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1076 1109 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1077 1110 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1078 1111 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1079 1112 #endif
1080 1113 } while (!_cm->has_aborted() && the_task->has_aborted());
1081 1114 }
1082 1115 the_task->record_end_time();
1083 1116 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1084 1117
1085 1118 ConcurrentGCThread::stsLeave();
1086 1119
1087 1120 double end_vtime = os::elapsedVTime();
1088 1121 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1089 1122 }
1090 1123
1091 1124 CMConcurrentMarkingTask(ConcurrentMark* cm,
1092 1125 ConcurrentMarkThread* cmt) :
1093 1126 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1094 1127
1095 1128 ~CMConcurrentMarkingTask() { }
1096 1129 };
1097 1130
1098 1131 // Calculates the number of active workers for a concurrent
1099 1132 // phase.
1100 1133 uint ConcurrentMark::calc_parallel_marking_threads() {
1101 1134 if (G1CollectedHeap::use_parallel_gc_threads()) {
1102 1135 uint n_conc_workers = 0;
1103 1136 if (!UseDynamicNumberOfGCThreads ||
1104 1137 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1105 1138 !ForceDynamicNumberOfGCThreads)) {
1106 1139 n_conc_workers = max_parallel_marking_threads();
1107 1140 } else {
1108 1141 n_conc_workers =
1109 1142 AdaptiveSizePolicy::calc_default_active_workers(
1110 1143 max_parallel_marking_threads(),
1111 1144 1, /* Minimum workers */
1112 1145 parallel_marking_threads(),
1113 1146 Threads::number_of_non_daemon_threads());
1114 1147 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1115 1148 // that scaling has already gone into "_max_parallel_marking_threads".
1116 1149 }
1117 1150 assert(n_conc_workers > 0, "Always need at least 1");
1118 1151 return n_conc_workers;
1119 1152 }
1120 1153 // If we are not running with any parallel GC threads we will not
1121 1154 // have spawned any marking threads either. Hence the number of
1122 1155 // concurrent workers should be 0.
1123 1156 return 0;
1124 1157 }
1125 1158
1126 1159 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1127 1160 // Currently, only survivors can be root regions.
1128 1161 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1129 1162 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1130 1163
1131 1164 const uintx interval = PrefetchScanIntervalInBytes;
1132 1165 HeapWord* curr = hr->bottom();
1133 1166 const HeapWord* end = hr->top();
1134 1167 while (curr < end) {
1135 1168 Prefetch::read(curr, interval);
1136 1169 oop obj = oop(curr);
1137 1170 int size = obj->oop_iterate(&cl);
1138 1171 assert(size == obj->size(), "sanity");
1139 1172 curr += size;
1140 1173 }
1141 1174 }
1142 1175
1143 1176 class CMRootRegionScanTask : public AbstractGangTask {
1144 1177 private:
1145 1178 ConcurrentMark* _cm;
1146 1179
1147 1180 public:
1148 1181 CMRootRegionScanTask(ConcurrentMark* cm) :
1149 1182 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1150 1183
1151 1184 void work(uint worker_id) {
1152 1185 assert(Thread::current()->is_ConcurrentGC_thread(),
1153 1186 "this should only be done by a conc GC thread");
1154 1187
1155 1188 CMRootRegions* root_regions = _cm->root_regions();
1156 1189 HeapRegion* hr = root_regions->claim_next();
1157 1190 while (hr != NULL) {
1158 1191 _cm->scanRootRegion(hr, worker_id);
1159 1192 hr = root_regions->claim_next();
1160 1193 }
1161 1194 }
1162 1195 };
1163 1196
1164 1197 void ConcurrentMark::scanRootRegions() {
1165 1198 // scan_in_progress() will have been set to true only if there was
1166 1199 // at least one root region to scan. So, if it's false, we
1167 1200 // should not attempt to do any further work.
1168 1201 if (root_regions()->scan_in_progress()) {
1169 1202 _parallel_marking_threads = calc_parallel_marking_threads();
1170 1203 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1171 1204 "Maximum number of marking threads exceeded");
1172 1205 uint active_workers = MAX2(1U, parallel_marking_threads());
1173 1206
1174 1207 CMRootRegionScanTask task(this);
1175 1208 if (parallel_marking_threads() > 0) {
1176 1209 _parallel_workers->set_active_workers((int) active_workers);
1177 1210 _parallel_workers->run_task(&task);
1178 1211 } else {
1179 1212 task.work(0);
1180 1213 }
1181 1214
1182 1215 // It's possible that has_aborted() is true here without actually
1183 1216 // aborting the survivor scan earlier. This is OK as it's
1184 1217 // mainly used for sanity checking.
1185 1218 root_regions()->scan_finished();
1186 1219 }
1187 1220 }
1188 1221
1189 1222 void ConcurrentMark::markFromRoots() {
1190 1223 // we might be tempted to assert that:
1191 1224 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1192 1225 // "inconsistent argument?");
1193 1226 // However that wouldn't be right, because it's possible that
1194 1227 // a safepoint is indeed in progress as a younger generation
1195 1228 // stop-the-world GC happens even as we mark in this generation.
1196 1229
1197 1230 _restart_for_overflow = false;
1198 1231 force_overflow_conc()->init();
1199 1232
1200 1233 // _g1h has _n_par_threads
1201 1234 _parallel_marking_threads = calc_parallel_marking_threads();
1202 1235 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1203 1236 "Maximum number of marking threads exceeded");
1204 1237
1205 1238 uint active_workers = MAX2(1U, parallel_marking_threads());
1206 1239
1207 1240 // Parallel task terminator is set in "set_phase()"
1208 1241 set_phase(active_workers, true /* concurrent */);
1209 1242
1210 1243 CMConcurrentMarkingTask markingTask(this, cmThread());
1211 1244 if (parallel_marking_threads() > 0) {
1212 1245 _parallel_workers->set_active_workers((int)active_workers);
1213 1246 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1214 1247 // and the decisions on that MT processing is made elsewhere.
1215 1248 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1216 1249 _parallel_workers->run_task(&markingTask);
1217 1250 } else {
1218 1251 markingTask.work(0);
1219 1252 }
1220 1253 print_stats();
1221 1254 }
1222 1255
1223 1256 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1224 1257 // world is stopped at this checkpoint
1225 1258 assert(SafepointSynchronize::is_at_safepoint(),
1226 1259 "world should be stopped");
1227 1260
1228 1261 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1229 1262
1230 1263 // If a full collection has happened, we shouldn't do this.
1231 1264 if (has_aborted()) {
1232 1265 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1233 1266 return;
1234 1267 }
1235 1268
1236 1269 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1237 1270
1238 1271 if (VerifyDuringGC) {
1239 1272 HandleMark hm; // handle scope
1240 1273 gclog_or_tty->print(" VerifyDuringGC:(before)");
1241 1274 Universe::heap()->prepare_for_verify();
1242 1275 Universe::verify(/* silent */ false,
1243 1276 /* option */ VerifyOption_G1UsePrevMarking);
1244 1277 }
1245 1278
1246 1279 G1CollectorPolicy* g1p = g1h->g1_policy();
1247 1280 g1p->record_concurrent_mark_remark_start();
1248 1281
1249 1282 double start = os::elapsedTime();
1250 1283
1251 1284 checkpointRootsFinalWork();
1252 1285
1253 1286 double mark_work_end = os::elapsedTime();
1254 1287
1255 1288 weakRefsWork(clear_all_soft_refs);
1256 1289
1257 1290 if (has_overflown()) {
1258 1291 // Oops. We overflowed. Restart concurrent marking.
1259 1292 _restart_for_overflow = true;
1260 1293 // Clear the flag. We do not need it any more.
1261 1294 clear_has_overflown();
1262 1295 if (G1TraceMarkStackOverflow) {
1263 1296 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1264 1297 }
1265 1298 } else {
1266 1299 // Aggregate the per-task counting data that we have accumulated
1267 1300 // while marking.
1268 1301 aggregate_count_data();
1269 1302
1270 1303 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1271 1304 // We're done with marking.
1272 1305 // This is the end of the marking cycle, we're expected all
1273 1306 // threads to have SATB queues with active set to true.
1274 1307 satb_mq_set.set_active_all_threads(false, /* new active value */
1275 1308 true /* expected_active */);
1276 1309
1277 1310 if (VerifyDuringGC) {
1278 1311 HandleMark hm; // handle scope
1279 1312 gclog_or_tty->print(" VerifyDuringGC:(after)");
1280 1313 Universe::heap()->prepare_for_verify();
1281 1314 Universe::verify(/* silent */ false,
1282 1315 /* option */ VerifyOption_G1UseNextMarking);
1283 1316 }
1284 1317 assert(!restart_for_overflow(), "sanity");
1285 1318 }
1286 1319
1287 1320 // Expand the marking stack, if we have to and if we can.
1288 1321 if (_markStack.should_expand()) {
1289 1322 _markStack.expand();
1290 1323 }
1291 1324
1292 1325 // Reset the marking state if marking completed
1293 1326 if (!restart_for_overflow()) {
1294 1327 set_non_marking_state();
1295 1328 }
1296 1329
1297 1330 #if VERIFY_OBJS_PROCESSED
1298 1331 _scan_obj_cl.objs_processed = 0;
1299 1332 ThreadLocalObjQueue::objs_enqueued = 0;
1300 1333 #endif
1301 1334
1302 1335 // Statistics
1303 1336 double now = os::elapsedTime();
1304 1337 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1305 1338 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1306 1339 _remark_times.add((now - start) * 1000.0);
1307 1340
1308 1341 g1p->record_concurrent_mark_remark_end();
1309 1342 }
1310 1343
1311 1344 // Base class of the closures that finalize and verify the
1312 1345 // liveness counting data.
1313 1346 class CMCountDataClosureBase: public HeapRegionClosure {
1314 1347 protected:
1315 1348 G1CollectedHeap* _g1h;
1316 1349 ConcurrentMark* _cm;
1317 1350 CardTableModRefBS* _ct_bs;
1318 1351
1319 1352 BitMap* _region_bm;
1320 1353 BitMap* _card_bm;
1321 1354
1322 1355 // Takes a region that's not empty (i.e., it has at least one
1323 1356 // live object in it and sets its corresponding bit on the region
1324 1357 // bitmap to 1. If the region is "starts humongous" it will also set
1325 1358 // to 1 the bits on the region bitmap that correspond to its
1326 1359 // associated "continues humongous" regions.
1327 1360 void set_bit_for_region(HeapRegion* hr) {
1328 1361 assert(!hr->continuesHumongous(), "should have filtered those out");
1329 1362
1330 1363 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1331 1364 if (!hr->startsHumongous()) {
1332 1365 // Normal (non-humongous) case: just set the bit.
1333 1366 _region_bm->par_at_put(index, true);
1334 1367 } else {
1335 1368 // Starts humongous case: calculate how many regions are part of
1336 1369 // this humongous region and then set the bit range.
1337 1370 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1338 1371 _region_bm->par_at_put_range(index, end_index, true);
1339 1372 }
1340 1373 }
1341 1374
1342 1375 public:
1343 1376 CMCountDataClosureBase(G1CollectedHeap* g1h,
1344 1377 BitMap* region_bm, BitMap* card_bm):
1345 1378 _g1h(g1h), _cm(g1h->concurrent_mark()),
1346 1379 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1347 1380 _region_bm(region_bm), _card_bm(card_bm) { }
1348 1381 };
1349 1382
1350 1383 // Closure that calculates the # live objects per region. Used
1351 1384 // for verification purposes during the cleanup pause.
1352 1385 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1353 1386 CMBitMapRO* _bm;
1354 1387 size_t _region_marked_bytes;
1355 1388
1356 1389 public:
1357 1390 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1358 1391 BitMap* region_bm, BitMap* card_bm) :
1359 1392 CMCountDataClosureBase(g1h, region_bm, card_bm),
1360 1393 _bm(bm), _region_marked_bytes(0) { }
1361 1394
1362 1395 bool doHeapRegion(HeapRegion* hr) {
1363 1396
1364 1397 if (hr->continuesHumongous()) {
1365 1398 // We will ignore these here and process them when their
1366 1399 // associated "starts humongous" region is processed (see
1367 1400 // set_bit_for_heap_region()). Note that we cannot rely on their
1368 1401 // associated "starts humongous" region to have their bit set to
1369 1402 // 1 since, due to the region chunking in the parallel region
1370 1403 // iteration, a "continues humongous" region might be visited
1371 1404 // before its associated "starts humongous".
1372 1405 return false;
1373 1406 }
1374 1407
1375 1408 HeapWord* ntams = hr->next_top_at_mark_start();
1376 1409 HeapWord* start = hr->bottom();
1377 1410
1378 1411 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1379 1412 err_msg("Preconditions not met - "
1380 1413 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1381 1414 start, ntams, hr->end()));
1382 1415
1383 1416 // Find the first marked object at or after "start".
1384 1417 start = _bm->getNextMarkedWordAddress(start, ntams);
1385 1418
1386 1419 size_t marked_bytes = 0;
1387 1420
1388 1421 while (start < ntams) {
1389 1422 oop obj = oop(start);
1390 1423 int obj_sz = obj->size();
1391 1424 HeapWord* obj_end = start + obj_sz;
1392 1425
1393 1426 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1394 1427 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1395 1428
1396 1429 // Note: if we're looking at the last region in heap - obj_end
1397 1430 // could be actually just beyond the end of the heap; end_idx
1398 1431 // will then correspond to a (non-existent) card that is also
1399 1432 // just beyond the heap.
1400 1433 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1401 1434 // end of object is not card aligned - increment to cover
1402 1435 // all the cards spanned by the object
1403 1436 end_idx += 1;
1404 1437 }
1405 1438
1406 1439 // Set the bits in the card BM for the cards spanned by this object.
1407 1440 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1408 1441
1409 1442 // Add the size of this object to the number of marked bytes.
1410 1443 marked_bytes += (size_t)obj_sz * HeapWordSize;
1411 1444
1412 1445 // Find the next marked object after this one.
1413 1446 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1414 1447 }
1415 1448
1416 1449 // Mark the allocated-since-marking portion...
1417 1450 HeapWord* top = hr->top();
1418 1451 if (ntams < top) {
1419 1452 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1420 1453 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1421 1454
1422 1455 // Note: if we're looking at the last region in heap - top
1423 1456 // could be actually just beyond the end of the heap; end_idx
1424 1457 // will then correspond to a (non-existent) card that is also
1425 1458 // just beyond the heap.
1426 1459 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1427 1460 // end of object is not card aligned - increment to cover
1428 1461 // all the cards spanned by the object
1429 1462 end_idx += 1;
1430 1463 }
1431 1464 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1432 1465
1433 1466 // This definitely means the region has live objects.
1434 1467 set_bit_for_region(hr);
1435 1468 }
1436 1469
1437 1470 // Update the live region bitmap.
1438 1471 if (marked_bytes > 0) {
1439 1472 set_bit_for_region(hr);
1440 1473 }
1441 1474
1442 1475 // Set the marked bytes for the current region so that
1443 1476 // it can be queried by a calling verificiation routine
1444 1477 _region_marked_bytes = marked_bytes;
1445 1478
1446 1479 return false;
1447 1480 }
1448 1481
1449 1482 size_t region_marked_bytes() const { return _region_marked_bytes; }
1450 1483 };
1451 1484
1452 1485 // Heap region closure used for verifying the counting data
1453 1486 // that was accumulated concurrently and aggregated during
1454 1487 // the remark pause. This closure is applied to the heap
1455 1488 // regions during the STW cleanup pause.
1456 1489
1457 1490 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1458 1491 G1CollectedHeap* _g1h;
1459 1492 ConcurrentMark* _cm;
1460 1493 CalcLiveObjectsClosure _calc_cl;
1461 1494 BitMap* _region_bm; // Region BM to be verified
1462 1495 BitMap* _card_bm; // Card BM to be verified
1463 1496 bool _verbose; // verbose output?
1464 1497
1465 1498 BitMap* _exp_region_bm; // Expected Region BM values
1466 1499 BitMap* _exp_card_bm; // Expected card BM values
1467 1500
1468 1501 int _failures;
1469 1502
1470 1503 public:
1471 1504 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1472 1505 BitMap* region_bm,
1473 1506 BitMap* card_bm,
1474 1507 BitMap* exp_region_bm,
1475 1508 BitMap* exp_card_bm,
1476 1509 bool verbose) :
1477 1510 _g1h(g1h), _cm(g1h->concurrent_mark()),
1478 1511 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1479 1512 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1480 1513 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1481 1514 _failures(0) { }
1482 1515
1483 1516 int failures() const { return _failures; }
1484 1517
1485 1518 bool doHeapRegion(HeapRegion* hr) {
1486 1519 if (hr->continuesHumongous()) {
1487 1520 // We will ignore these here and process them when their
1488 1521 // associated "starts humongous" region is processed (see
1489 1522 // set_bit_for_heap_region()). Note that we cannot rely on their
1490 1523 // associated "starts humongous" region to have their bit set to
1491 1524 // 1 since, due to the region chunking in the parallel region
1492 1525 // iteration, a "continues humongous" region might be visited
1493 1526 // before its associated "starts humongous".
1494 1527 return false;
1495 1528 }
1496 1529
1497 1530 int failures = 0;
1498 1531
1499 1532 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1500 1533 // this region and set the corresponding bits in the expected region
1501 1534 // and card bitmaps.
1502 1535 bool res = _calc_cl.doHeapRegion(hr);
1503 1536 assert(res == false, "should be continuing");
1504 1537
1505 1538 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1506 1539 Mutex::_no_safepoint_check_flag);
1507 1540
1508 1541 // Verify the marked bytes for this region.
1509 1542 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1510 1543 size_t act_marked_bytes = hr->next_marked_bytes();
1511 1544
1512 1545 // We're not OK if expected marked bytes > actual marked bytes. It means
1513 1546 // we have missed accounting some objects during the actual marking.
1514 1547 if (exp_marked_bytes > act_marked_bytes) {
1515 1548 if (_verbose) {
1516 1549 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1517 1550 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1518 1551 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1519 1552 }
1520 1553 failures += 1;
1521 1554 }
1522 1555
1523 1556 // Verify the bit, for this region, in the actual and expected
1524 1557 // (which was just calculated) region bit maps.
1525 1558 // We're not OK if the bit in the calculated expected region
1526 1559 // bitmap is set and the bit in the actual region bitmap is not.
1527 1560 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1528 1561
1529 1562 bool expected = _exp_region_bm->at(index);
1530 1563 bool actual = _region_bm->at(index);
1531 1564 if (expected && !actual) {
1532 1565 if (_verbose) {
1533 1566 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1534 1567 "expected: %s, actual: %s",
1535 1568 hr->hrs_index(),
1536 1569 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1537 1570 }
1538 1571 failures += 1;
1539 1572 }
1540 1573
1541 1574 // Verify that the card bit maps for the cards spanned by the current
1542 1575 // region match. We have an error if we have a set bit in the expected
1543 1576 // bit map and the corresponding bit in the actual bitmap is not set.
1544 1577
1545 1578 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1546 1579 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1547 1580
1548 1581 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1549 1582 expected = _exp_card_bm->at(i);
1550 1583 actual = _card_bm->at(i);
1551 1584
1552 1585 if (expected && !actual) {
1553 1586 if (_verbose) {
1554 1587 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1555 1588 "expected: %s, actual: %s",
1556 1589 hr->hrs_index(), i,
1557 1590 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1558 1591 }
1559 1592 failures += 1;
1560 1593 }
1561 1594 }
1562 1595
1563 1596 if (failures > 0 && _verbose) {
1564 1597 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1565 1598 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1566 1599 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1567 1600 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1568 1601 }
1569 1602
1570 1603 _failures += failures;
1571 1604
1572 1605 // We could stop iteration over the heap when we
1573 1606 // find the first violating region by returning true.
1574 1607 return false;
1575 1608 }
1576 1609 };
1577 1610
1578 1611
1579 1612 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1580 1613 protected:
1581 1614 G1CollectedHeap* _g1h;
1582 1615 ConcurrentMark* _cm;
1583 1616 BitMap* _actual_region_bm;
1584 1617 BitMap* _actual_card_bm;
1585 1618
1586 1619 uint _n_workers;
1587 1620
1588 1621 BitMap* _expected_region_bm;
1589 1622 BitMap* _expected_card_bm;
1590 1623
1591 1624 int _failures;
1592 1625 bool _verbose;
1593 1626
1594 1627 public:
1595 1628 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1596 1629 BitMap* region_bm, BitMap* card_bm,
1597 1630 BitMap* expected_region_bm, BitMap* expected_card_bm)
1598 1631 : AbstractGangTask("G1 verify final counting"),
1599 1632 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1600 1633 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1601 1634 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1602 1635 _failures(0), _verbose(false),
1603 1636 _n_workers(0) {
1604 1637 assert(VerifyDuringGC, "don't call this otherwise");
1605 1638
1606 1639 // Use the value already set as the number of active threads
1607 1640 // in the call to run_task().
1608 1641 if (G1CollectedHeap::use_parallel_gc_threads()) {
1609 1642 assert( _g1h->workers()->active_workers() > 0,
1610 1643 "Should have been previously set");
1611 1644 _n_workers = _g1h->workers()->active_workers();
1612 1645 } else {
1613 1646 _n_workers = 1;
1614 1647 }
1615 1648
1616 1649 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1617 1650 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1618 1651
1619 1652 _verbose = _cm->verbose_medium();
1620 1653 }
1621 1654
1622 1655 void work(uint worker_id) {
1623 1656 assert(worker_id < _n_workers, "invariant");
1624 1657
1625 1658 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1626 1659 _actual_region_bm, _actual_card_bm,
1627 1660 _expected_region_bm,
1628 1661 _expected_card_bm,
1629 1662 _verbose);
1630 1663
1631 1664 if (G1CollectedHeap::use_parallel_gc_threads()) {
1632 1665 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1633 1666 worker_id,
1634 1667 _n_workers,
1635 1668 HeapRegion::VerifyCountClaimValue);
1636 1669 } else {
1637 1670 _g1h->heap_region_iterate(&verify_cl);
1638 1671 }
1639 1672
1640 1673 Atomic::add(verify_cl.failures(), &_failures);
1641 1674 }
1642 1675
1643 1676 int failures() const { return _failures; }
1644 1677 };
1645 1678
1646 1679 // Closure that finalizes the liveness counting data.
1647 1680 // Used during the cleanup pause.
1648 1681 // Sets the bits corresponding to the interval [NTAMS, top]
1649 1682 // (which contains the implicitly live objects) in the
1650 1683 // card liveness bitmap. Also sets the bit for each region,
1651 1684 // containing live data, in the region liveness bitmap.
1652 1685
1653 1686 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1654 1687 public:
1655 1688 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1656 1689 BitMap* region_bm,
1657 1690 BitMap* card_bm) :
1658 1691 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1659 1692
1660 1693 bool doHeapRegion(HeapRegion* hr) {
1661 1694
1662 1695 if (hr->continuesHumongous()) {
1663 1696 // We will ignore these here and process them when their
1664 1697 // associated "starts humongous" region is processed (see
1665 1698 // set_bit_for_heap_region()). Note that we cannot rely on their
1666 1699 // associated "starts humongous" region to have their bit set to
1667 1700 // 1 since, due to the region chunking in the parallel region
1668 1701 // iteration, a "continues humongous" region might be visited
1669 1702 // before its associated "starts humongous".
1670 1703 return false;
1671 1704 }
1672 1705
1673 1706 HeapWord* ntams = hr->next_top_at_mark_start();
1674 1707 HeapWord* top = hr->top();
1675 1708
1676 1709 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1677 1710
1678 1711 // Mark the allocated-since-marking portion...
1679 1712 if (ntams < top) {
1680 1713 // This definitely means the region has live objects.
1681 1714 set_bit_for_region(hr);
1682 1715
1683 1716 // Now set the bits in the card bitmap for [ntams, top)
1684 1717 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1685 1718 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1686 1719
1687 1720 // Note: if we're looking at the last region in heap - top
1688 1721 // could be actually just beyond the end of the heap; end_idx
1689 1722 // will then correspond to a (non-existent) card that is also
1690 1723 // just beyond the heap.
1691 1724 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1692 1725 // end of object is not card aligned - increment to cover
1693 1726 // all the cards spanned by the object
1694 1727 end_idx += 1;
1695 1728 }
1696 1729
1697 1730 assert(end_idx <= _card_bm->size(),
1698 1731 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1699 1732 end_idx, _card_bm->size()));
1700 1733 assert(start_idx < _card_bm->size(),
1701 1734 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1702 1735 start_idx, _card_bm->size()));
1703 1736
1704 1737 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1705 1738 }
1706 1739
1707 1740 // Set the bit for the region if it contains live data
1708 1741 if (hr->next_marked_bytes() > 0) {
1709 1742 set_bit_for_region(hr);
1710 1743 }
1711 1744
1712 1745 return false;
1713 1746 }
1714 1747 };
1715 1748
1716 1749 class G1ParFinalCountTask: public AbstractGangTask {
1717 1750 protected:
1718 1751 G1CollectedHeap* _g1h;
1719 1752 ConcurrentMark* _cm;
1720 1753 BitMap* _actual_region_bm;
1721 1754 BitMap* _actual_card_bm;
1722 1755
1723 1756 uint _n_workers;
1724 1757
1725 1758 public:
1726 1759 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1727 1760 : AbstractGangTask("G1 final counting"),
1728 1761 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1729 1762 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1730 1763 _n_workers(0) {
1731 1764 // Use the value already set as the number of active threads
1732 1765 // in the call to run_task().
1733 1766 if (G1CollectedHeap::use_parallel_gc_threads()) {
1734 1767 assert( _g1h->workers()->active_workers() > 0,
1735 1768 "Should have been previously set");
1736 1769 _n_workers = _g1h->workers()->active_workers();
1737 1770 } else {
1738 1771 _n_workers = 1;
1739 1772 }
1740 1773 }
1741 1774
1742 1775 void work(uint worker_id) {
1743 1776 assert(worker_id < _n_workers, "invariant");
1744 1777
1745 1778 FinalCountDataUpdateClosure final_update_cl(_g1h,
1746 1779 _actual_region_bm,
1747 1780 _actual_card_bm);
1748 1781
1749 1782 if (G1CollectedHeap::use_parallel_gc_threads()) {
1750 1783 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1751 1784 worker_id,
1752 1785 _n_workers,
1753 1786 HeapRegion::FinalCountClaimValue);
1754 1787 } else {
1755 1788 _g1h->heap_region_iterate(&final_update_cl);
1756 1789 }
1757 1790 }
1758 1791 };
1759 1792
1760 1793 class G1ParNoteEndTask;
1761 1794
1762 1795 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1763 1796 G1CollectedHeap* _g1;
1764 1797 int _worker_num;
1765 1798 size_t _max_live_bytes;
1766 1799 uint _regions_claimed;
1767 1800 size_t _freed_bytes;
1768 1801 FreeRegionList* _local_cleanup_list;
1769 1802 OldRegionSet* _old_proxy_set;
1770 1803 HumongousRegionSet* _humongous_proxy_set;
1771 1804 HRRSCleanupTask* _hrrs_cleanup_task;
1772 1805 double _claimed_region_time;
1773 1806 double _max_region_time;
1774 1807
1775 1808 public:
1776 1809 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1777 1810 int worker_num,
1778 1811 FreeRegionList* local_cleanup_list,
1779 1812 OldRegionSet* old_proxy_set,
1780 1813 HumongousRegionSet* humongous_proxy_set,
1781 1814 HRRSCleanupTask* hrrs_cleanup_task) :
1782 1815 _g1(g1), _worker_num(worker_num),
1783 1816 _max_live_bytes(0), _regions_claimed(0),
1784 1817 _freed_bytes(0),
1785 1818 _claimed_region_time(0.0), _max_region_time(0.0),
1786 1819 _local_cleanup_list(local_cleanup_list),
1787 1820 _old_proxy_set(old_proxy_set),
1788 1821 _humongous_proxy_set(humongous_proxy_set),
1789 1822 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1790 1823
1791 1824 size_t freed_bytes() { return _freed_bytes; }
1792 1825
1793 1826 bool doHeapRegion(HeapRegion *hr) {
1794 1827 if (hr->continuesHumongous()) {
1795 1828 return false;
1796 1829 }
1797 1830 // We use a claim value of zero here because all regions
1798 1831 // were claimed with value 1 in the FinalCount task.
1799 1832 _g1->reset_gc_time_stamps(hr);
1800 1833 double start = os::elapsedTime();
1801 1834 _regions_claimed++;
1802 1835 hr->note_end_of_marking();
1803 1836 _max_live_bytes += hr->max_live_bytes();
1804 1837 _g1->free_region_if_empty(hr,
1805 1838 &_freed_bytes,
1806 1839 _local_cleanup_list,
1807 1840 _old_proxy_set,
1808 1841 _humongous_proxy_set,
1809 1842 _hrrs_cleanup_task,
1810 1843 true /* par */);
1811 1844 double region_time = (os::elapsedTime() - start);
1812 1845 _claimed_region_time += region_time;
1813 1846 if (region_time > _max_region_time) {
1814 1847 _max_region_time = region_time;
1815 1848 }
1816 1849 return false;
1817 1850 }
1818 1851
1819 1852 size_t max_live_bytes() { return _max_live_bytes; }
1820 1853 uint regions_claimed() { return _regions_claimed; }
1821 1854 double claimed_region_time_sec() { return _claimed_region_time; }
1822 1855 double max_region_time_sec() { return _max_region_time; }
1823 1856 };
1824 1857
1825 1858 class G1ParNoteEndTask: public AbstractGangTask {
1826 1859 friend class G1NoteEndOfConcMarkClosure;
1827 1860
1828 1861 protected:
1829 1862 G1CollectedHeap* _g1h;
1830 1863 size_t _max_live_bytes;
1831 1864 size_t _freed_bytes;
1832 1865 FreeRegionList* _cleanup_list;
1833 1866
1834 1867 public:
1835 1868 G1ParNoteEndTask(G1CollectedHeap* g1h,
1836 1869 FreeRegionList* cleanup_list) :
1837 1870 AbstractGangTask("G1 note end"), _g1h(g1h),
1838 1871 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1839 1872
1840 1873 void work(uint worker_id) {
1841 1874 double start = os::elapsedTime();
1842 1875 FreeRegionList local_cleanup_list("Local Cleanup List");
1843 1876 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1844 1877 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1845 1878 HRRSCleanupTask hrrs_cleanup_task;
1846 1879 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1847 1880 &old_proxy_set,
1848 1881 &humongous_proxy_set,
1849 1882 &hrrs_cleanup_task);
1850 1883 if (G1CollectedHeap::use_parallel_gc_threads()) {
1851 1884 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1852 1885 _g1h->workers()->active_workers(),
1853 1886 HeapRegion::NoteEndClaimValue);
1854 1887 } else {
1855 1888 _g1h->heap_region_iterate(&g1_note_end);
1856 1889 }
1857 1890 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1858 1891
1859 1892 // Now update the lists
1860 1893 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1861 1894 NULL /* free_list */,
1862 1895 &old_proxy_set,
1863 1896 &humongous_proxy_set,
1864 1897 true /* par */);
1865 1898 {
1866 1899 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1867 1900 _max_live_bytes += g1_note_end.max_live_bytes();
1868 1901 _freed_bytes += g1_note_end.freed_bytes();
1869 1902
1870 1903 // If we iterate over the global cleanup list at the end of
1871 1904 // cleanup to do this printing we will not guarantee to only
1872 1905 // generate output for the newly-reclaimed regions (the list
1873 1906 // might not be empty at the beginning of cleanup; we might
1874 1907 // still be working on its previous contents). So we do the
1875 1908 // printing here, before we append the new regions to the global
1876 1909 // cleanup list.
1877 1910
1878 1911 G1HRPrinter* hr_printer = _g1h->hr_printer();
1879 1912 if (hr_printer->is_active()) {
1880 1913 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1881 1914 while (iter.more_available()) {
1882 1915 HeapRegion* hr = iter.get_next();
1883 1916 hr_printer->cleanup(hr);
1884 1917 }
1885 1918 }
1886 1919
1887 1920 _cleanup_list->add_as_tail(&local_cleanup_list);
1888 1921 assert(local_cleanup_list.is_empty(), "post-condition");
1889 1922
1890 1923 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1891 1924 }
1892 1925 }
1893 1926 size_t max_live_bytes() { return _max_live_bytes; }
1894 1927 size_t freed_bytes() { return _freed_bytes; }
1895 1928 };
1896 1929
1897 1930 class G1ParScrubRemSetTask: public AbstractGangTask {
1898 1931 protected:
1899 1932 G1RemSet* _g1rs;
1900 1933 BitMap* _region_bm;
1901 1934 BitMap* _card_bm;
1902 1935 public:
1903 1936 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1904 1937 BitMap* region_bm, BitMap* card_bm) :
1905 1938 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1906 1939 _region_bm(region_bm), _card_bm(card_bm) { }
1907 1940
1908 1941 void work(uint worker_id) {
1909 1942 if (G1CollectedHeap::use_parallel_gc_threads()) {
1910 1943 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1911 1944 HeapRegion::ScrubRemSetClaimValue);
1912 1945 } else {
1913 1946 _g1rs->scrub(_region_bm, _card_bm);
1914 1947 }
1915 1948 }
1916 1949
1917 1950 };
1918 1951
1919 1952 void ConcurrentMark::cleanup() {
1920 1953 // world is stopped at this checkpoint
1921 1954 assert(SafepointSynchronize::is_at_safepoint(),
1922 1955 "world should be stopped");
1923 1956 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1924 1957
1925 1958 // If a full collection has happened, we shouldn't do this.
1926 1959 if (has_aborted()) {
1927 1960 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1928 1961 return;
1929 1962 }
1930 1963
1931 1964 HRSPhaseSetter x(HRSPhaseCleanup);
1932 1965 g1h->verify_region_sets_optional();
1933 1966
1934 1967 if (VerifyDuringGC) {
1935 1968 HandleMark hm; // handle scope
1936 1969 gclog_or_tty->print(" VerifyDuringGC:(before)");
1937 1970 Universe::heap()->prepare_for_verify();
1938 1971 Universe::verify(/* silent */ false,
1939 1972 /* option */ VerifyOption_G1UsePrevMarking);
1940 1973 }
1941 1974
1942 1975 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1943 1976 g1p->record_concurrent_mark_cleanup_start();
1944 1977
1945 1978 double start = os::elapsedTime();
1946 1979
1947 1980 HeapRegionRemSet::reset_for_cleanup_tasks();
1948 1981
1949 1982 uint n_workers;
1950 1983
1951 1984 // Do counting once more with the world stopped for good measure.
1952 1985 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1953 1986
1954 1987 if (G1CollectedHeap::use_parallel_gc_threads()) {
1955 1988 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1956 1989 "sanity check");
1957 1990
1958 1991 g1h->set_par_threads();
1959 1992 n_workers = g1h->n_par_threads();
1960 1993 assert(g1h->n_par_threads() == n_workers,
1961 1994 "Should not have been reset");
1962 1995 g1h->workers()->run_task(&g1_par_count_task);
1963 1996 // Done with the parallel phase so reset to 0.
1964 1997 g1h->set_par_threads(0);
1965 1998
1966 1999 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1967 2000 "sanity check");
1968 2001 } else {
1969 2002 n_workers = 1;
1970 2003 g1_par_count_task.work(0);
1971 2004 }
1972 2005
1973 2006 if (VerifyDuringGC) {
1974 2007 // Verify that the counting data accumulated during marking matches
1975 2008 // that calculated by walking the marking bitmap.
1976 2009
1977 2010 // Bitmaps to hold expected values
1978 2011 BitMap expected_region_bm(_region_bm.size(), false);
1979 2012 BitMap expected_card_bm(_card_bm.size(), false);
1980 2013
1981 2014 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1982 2015 &_region_bm,
1983 2016 &_card_bm,
1984 2017 &expected_region_bm,
1985 2018 &expected_card_bm);
1986 2019
1987 2020 if (G1CollectedHeap::use_parallel_gc_threads()) {
1988 2021 g1h->set_par_threads((int)n_workers);
1989 2022 g1h->workers()->run_task(&g1_par_verify_task);
1990 2023 // Done with the parallel phase so reset to 0.
1991 2024 g1h->set_par_threads(0);
1992 2025
1993 2026 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1994 2027 "sanity check");
1995 2028 } else {
1996 2029 g1_par_verify_task.work(0);
1997 2030 }
1998 2031
1999 2032 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2000 2033 }
2001 2034
2002 2035 size_t start_used_bytes = g1h->used();
2003 2036 g1h->set_marking_complete();
2004 2037
2005 2038 double count_end = os::elapsedTime();
2006 2039 double this_final_counting_time = (count_end - start);
2007 2040 _total_counting_time += this_final_counting_time;
2008 2041
2009 2042 if (G1PrintRegionLivenessInfo) {
2010 2043 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2011 2044 _g1h->heap_region_iterate(&cl);
2012 2045 }
2013 2046
2014 2047 // Install newly created mark bitMap as "prev".
2015 2048 swapMarkBitMaps();
2016 2049
2017 2050 g1h->reset_gc_time_stamp();
2018 2051
2019 2052 // Note end of marking in all heap regions.
2020 2053 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2021 2054 if (G1CollectedHeap::use_parallel_gc_threads()) {
2022 2055 g1h->set_par_threads((int)n_workers);
2023 2056 g1h->workers()->run_task(&g1_par_note_end_task);
2024 2057 g1h->set_par_threads(0);
2025 2058
2026 2059 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2027 2060 "sanity check");
2028 2061 } else {
2029 2062 g1_par_note_end_task.work(0);
2030 2063 }
2031 2064 g1h->check_gc_time_stamps();
2032 2065
2033 2066 if (!cleanup_list_is_empty()) {
2034 2067 // The cleanup list is not empty, so we'll have to process it
2035 2068 // concurrently. Notify anyone else that might be wanting free
2036 2069 // regions that there will be more free regions coming soon.
2037 2070 g1h->set_free_regions_coming();
2038 2071 }
2039 2072
2040 2073 // call below, since it affects the metric by which we sort the heap
2041 2074 // regions.
2042 2075 if (G1ScrubRemSets) {
2043 2076 double rs_scrub_start = os::elapsedTime();
2044 2077 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2045 2078 if (G1CollectedHeap::use_parallel_gc_threads()) {
2046 2079 g1h->set_par_threads((int)n_workers);
2047 2080 g1h->workers()->run_task(&g1_par_scrub_rs_task);
2048 2081 g1h->set_par_threads(0);
2049 2082
2050 2083 assert(g1h->check_heap_region_claim_values(
2051 2084 HeapRegion::ScrubRemSetClaimValue),
2052 2085 "sanity check");
2053 2086 } else {
2054 2087 g1_par_scrub_rs_task.work(0);
2055 2088 }
2056 2089
2057 2090 double rs_scrub_end = os::elapsedTime();
2058 2091 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2059 2092 _total_rs_scrub_time += this_rs_scrub_time;
2060 2093 }
2061 2094
2062 2095 // this will also free any regions totally full of garbage objects,
2063 2096 // and sort the regions.
2064 2097 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2065 2098
2066 2099 // Statistics.
2067 2100 double end = os::elapsedTime();
2068 2101 _cleanup_times.add((end - start) * 1000.0);
2069 2102
2070 2103 if (G1Log::fine()) {
2071 2104 g1h->print_size_transition(gclog_or_tty,
2072 2105 start_used_bytes,
2073 2106 g1h->used(),
2074 2107 g1h->capacity());
2075 2108 }
2076 2109
2077 2110 // Clean up will have freed any regions completely full of garbage.
2078 2111 // Update the soft reference policy with the new heap occupancy.
2079 2112 Universe::update_heap_info_at_gc();
2080 2113
2081 2114 // We need to make this be a "collection" so any collection pause that
2082 2115 // races with it goes around and waits for completeCleanup to finish.
2083 2116 g1h->increment_total_collections();
2084 2117
2085 2118 // We reclaimed old regions so we should calculate the sizes to make
2086 2119 // sure we update the old gen/space data.
2087 2120 g1h->g1mm()->update_sizes();
2088 2121
2089 2122 if (VerifyDuringGC) {
2090 2123 HandleMark hm; // handle scope
2091 2124 gclog_or_tty->print(" VerifyDuringGC:(after)");
2092 2125 Universe::heap()->prepare_for_verify();
2093 2126 Universe::verify(/* silent */ false,
2094 2127 /* option */ VerifyOption_G1UsePrevMarking);
2095 2128 }
2096 2129
2097 2130 g1h->verify_region_sets_optional();
2098 2131 }
2099 2132
2100 2133 void ConcurrentMark::completeCleanup() {
2101 2134 if (has_aborted()) return;
2102 2135
2103 2136 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2104 2137
2105 2138 _cleanup_list.verify_optional();
2106 2139 FreeRegionList tmp_free_list("Tmp Free List");
2107 2140
2108 2141 if (G1ConcRegionFreeingVerbose) {
2109 2142 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2110 2143 "cleanup list has %u entries",
2111 2144 _cleanup_list.length());
2112 2145 }
2113 2146
2114 2147 // Noone else should be accessing the _cleanup_list at this point,
2115 2148 // so it's not necessary to take any locks
2116 2149 while (!_cleanup_list.is_empty()) {
2117 2150 HeapRegion* hr = _cleanup_list.remove_head();
2118 2151 assert(hr != NULL, "the list was not empty");
2119 2152 hr->par_clear();
2120 2153 tmp_free_list.add_as_tail(hr);
2121 2154
2122 2155 // Instead of adding one region at a time to the secondary_free_list,
2123 2156 // we accumulate them in the local list and move them a few at a
2124 2157 // time. This also cuts down on the number of notify_all() calls
2125 2158 // we do during this process. We'll also append the local list when
2126 2159 // _cleanup_list is empty (which means we just removed the last
2127 2160 // region from the _cleanup_list).
2128 2161 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2129 2162 _cleanup_list.is_empty()) {
2130 2163 if (G1ConcRegionFreeingVerbose) {
2131 2164 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2132 2165 "appending %u entries to the secondary_free_list, "
2133 2166 "cleanup list still has %u entries",
2134 2167 tmp_free_list.length(),
2135 2168 _cleanup_list.length());
2136 2169 }
2137 2170
2138 2171 {
2139 2172 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2140 2173 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2141 2174 SecondaryFreeList_lock->notify_all();
2142 2175 }
2143 2176
2144 2177 if (G1StressConcRegionFreeing) {
2145 2178 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2146 2179 os::sleep(Thread::current(), (jlong) 1, false);
2147 2180 }
2148 2181 }
2149 2182 }
2150 2183 }
2151 2184 assert(tmp_free_list.is_empty(), "post-condition");
2152 2185 }
2153 2186
2154 2187 // Support closures for reference procssing in G1
2155 2188
2156 2189 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2157 2190 HeapWord* addr = (HeapWord*)obj;
2158 2191 return addr != NULL &&
2159 2192 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2160 2193 }
2161 2194
2162 2195 class G1CMKeepAliveClosure: public ExtendedOopClosure {
2163 2196 G1CollectedHeap* _g1;
2164 2197 ConcurrentMark* _cm;
2165 2198 public:
2166 2199 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2167 2200 _g1(g1), _cm(cm) {
2168 2201 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2169 2202 }
2170 2203
2171 2204 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2172 2205 virtual void do_oop( oop* p) { do_oop_work(p); }
2173 2206
2174 2207 template <class T> void do_oop_work(T* p) {
2175 2208 oop obj = oopDesc::load_decode_heap_oop(p);
2176 2209 HeapWord* addr = (HeapWord*)obj;
2177 2210
2178 2211 if (_cm->verbose_high()) {
2179 2212 gclog_or_tty->print_cr("\t[0] we're looking at location "
2180 2213 "*"PTR_FORMAT" = "PTR_FORMAT,
2181 2214 p, (void*) obj);
2182 2215 }
2183 2216
2184 2217 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2185 2218 _cm->mark_and_count(obj);
2186 2219 _cm->mark_stack_push(obj);
2187 2220 }
2188 2221 }
2189 2222 };
2190 2223
2191 2224 class G1CMDrainMarkingStackClosure: public VoidClosure {
2192 2225 ConcurrentMark* _cm;
2193 2226 CMMarkStack* _markStack;
2194 2227 G1CMKeepAliveClosure* _oopClosure;
2195 2228 public:
2196 2229 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2197 2230 G1CMKeepAliveClosure* oopClosure) :
2198 2231 _cm(cm),
2199 2232 _markStack(markStack),
2200 2233 _oopClosure(oopClosure) { }
2201 2234
2202 2235 void do_void() {
2203 2236 _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
2204 2237 }
2205 2238 };
2206 2239
2207 2240 // 'Keep Alive' closure used by parallel reference processing.
2208 2241 // An instance of this closure is used in the parallel reference processing
2209 2242 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2210 2243 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2211 2244 // placed on to discovered ref lists once so we can mark and push with no
2212 2245 // need to check whether the object has already been marked. Using the
2213 2246 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2214 2247 // operating on the global mark stack. This means that an individual
2215 2248 // worker would be doing lock-free pushes while it processes its own
2216 2249 // discovered ref list followed by drain call. If the discovered ref lists
2217 2250 // are unbalanced then this could cause interference with the other
2218 2251 // workers. Using a CMTask (and its embedded local data structures)
2219 2252 // avoids that potential interference.
2220 2253 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2221 2254 ConcurrentMark* _cm;
2222 2255 CMTask* _task;
2223 2256 int _ref_counter_limit;
2224 2257 int _ref_counter;
2225 2258 public:
2226 2259 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2227 2260 _cm(cm), _task(task),
2228 2261 _ref_counter_limit(G1RefProcDrainInterval) {
2229 2262 assert(_ref_counter_limit > 0, "sanity");
2230 2263 _ref_counter = _ref_counter_limit;
2231 2264 }
2232 2265
2233 2266 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2234 2267 virtual void do_oop( oop* p) { do_oop_work(p); }
2235 2268
2236 2269 template <class T> void do_oop_work(T* p) {
2237 2270 if (!_cm->has_overflown()) {
2238 2271 oop obj = oopDesc::load_decode_heap_oop(p);
2239 2272 if (_cm->verbose_high()) {
2240 2273 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2241 2274 "*"PTR_FORMAT" = "PTR_FORMAT,
2242 2275 _task->worker_id(), p, (void*) obj);
2243 2276 }
2244 2277
2245 2278 _task->deal_with_reference(obj);
2246 2279 _ref_counter--;
2247 2280
2248 2281 if (_ref_counter == 0) {
2249 2282 // We have dealt with _ref_counter_limit references, pushing them and objects
2250 2283 // reachable from them on to the local stack (and possibly the global stack).
2251 2284 // Call do_marking_step() to process these entries. We call the routine in a
2252 2285 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2253 2286 // with the entries that we've pushed as a result of the deal_with_reference
2254 2287 // calls above) or we overflow.
2255 2288 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2256 2289 // while there may still be some work to do. (See the comment at the
2257 2290 // beginning of CMTask::do_marking_step() for those conditions - one of which
2258 2291 // is reaching the specified time target.) It is only when
2259 2292 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2260 2293 // that the marking has completed.
2261 2294 do {
2262 2295 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2263 2296 _task->do_marking_step(mark_step_duration_ms,
2264 2297 false /* do_stealing */,
2265 2298 false /* do_termination */);
2266 2299 } while (_task->has_aborted() && !_cm->has_overflown());
2267 2300 _ref_counter = _ref_counter_limit;
2268 2301 }
2269 2302 } else {
2270 2303 if (_cm->verbose_high()) {
2271 2304 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2272 2305 }
2273 2306 }
2274 2307 }
2275 2308 };
2276 2309
2277 2310 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2278 2311 ConcurrentMark* _cm;
2279 2312 CMTask* _task;
2280 2313 public:
2281 2314 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2282 2315 _cm(cm), _task(task) { }
2283 2316
2284 2317 void do_void() {
2285 2318 do {
2286 2319 if (_cm->verbose_high()) {
2287 2320 gclog_or_tty->print_cr("\t[%u] Drain: Calling do marking_step",
2288 2321 _task->worker_id());
2289 2322 }
2290 2323
2291 2324 // We call CMTask::do_marking_step() to completely drain the local and
2292 2325 // global marking stacks. The routine is called in a loop, which we'll
2293 2326 // exit if there's nothing more to do (i.e. we'completely drained the
2294 2327 // entries that were pushed as a result of applying the
2295 2328 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2296 2329 // lists above) or we overflow the global marking stack.
2297 2330 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2298 2331 // while there may still be some work to do. (See the comment at the
2299 2332 // beginning of CMTask::do_marking_step() for those conditions - one of which
2300 2333 // is reaching the specified time target.) It is only when
2301 2334 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2302 2335 // that the marking has completed.
2303 2336
2304 2337 _task->do_marking_step(1000000000.0 /* something very large */,
2305 2338 true /* do_stealing */,
2306 2339 true /* do_termination */);
2307 2340 } while (_task->has_aborted() && !_cm->has_overflown());
2308 2341 }
2309 2342 };
2310 2343
2311 2344 // Implementation of AbstractRefProcTaskExecutor for parallel
2312 2345 // reference processing at the end of G1 concurrent marking
2313 2346
2314 2347 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2315 2348 private:
2316 2349 G1CollectedHeap* _g1h;
2317 2350 ConcurrentMark* _cm;
2318 2351 WorkGang* _workers;
2319 2352 int _active_workers;
2320 2353
2321 2354 public:
2322 2355 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2323 2356 ConcurrentMark* cm,
2324 2357 WorkGang* workers,
2325 2358 int n_workers) :
2326 2359 _g1h(g1h), _cm(cm),
2327 2360 _workers(workers), _active_workers(n_workers) { }
2328 2361
2329 2362 // Executes the given task using concurrent marking worker threads.
2330 2363 virtual void execute(ProcessTask& task);
2331 2364 virtual void execute(EnqueueTask& task);
2332 2365 };
2333 2366
2334 2367 class G1CMRefProcTaskProxy: public AbstractGangTask {
2335 2368 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2336 2369 ProcessTask& _proc_task;
2337 2370 G1CollectedHeap* _g1h;
2338 2371 ConcurrentMark* _cm;
2339 2372
2340 2373 public:
2341 2374 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2342 2375 G1CollectedHeap* g1h,
2343 2376 ConcurrentMark* cm) :
2344 2377 AbstractGangTask("Process reference objects in parallel"),
2345 2378 _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2346 2379
2347 2380 virtual void work(uint worker_id) {
2348 2381 CMTask* marking_task = _cm->task(worker_id);
2349 2382 G1CMIsAliveClosure g1_is_alive(_g1h);
2350 2383 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2351 2384 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2352 2385
2353 2386 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2354 2387 }
2355 2388 };
2356 2389
2357 2390 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2358 2391 assert(_workers != NULL, "Need parallel worker threads.");
2359 2392
2360 2393 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2361 2394
2362 2395 // We need to reset the phase for each task execution so that
2363 2396 // the termination protocol of CMTask::do_marking_step works.
2364 2397 _cm->set_phase(_active_workers, false /* concurrent */);
2365 2398 _g1h->set_par_threads(_active_workers);
2366 2399 _workers->run_task(&proc_task_proxy);
2367 2400 _g1h->set_par_threads(0);
2368 2401 }
2369 2402
2370 2403 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2371 2404 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2372 2405 EnqueueTask& _enq_task;
2373 2406
2374 2407 public:
2375 2408 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2376 2409 AbstractGangTask("Enqueue reference objects in parallel"),
2377 2410 _enq_task(enq_task) { }
2378 2411
2379 2412 virtual void work(uint worker_id) {
2380 2413 _enq_task.work(worker_id);
2381 2414 }
2382 2415 };
2383 2416
2384 2417 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2385 2418 assert(_workers != NULL, "Need parallel worker threads.");
2386 2419
2387 2420 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2388 2421
2389 2422 _g1h->set_par_threads(_active_workers);
2390 2423 _workers->run_task(&enq_task_proxy);
2391 2424 _g1h->set_par_threads(0);
2392 2425 }
2393 2426
2394 2427 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2395 2428 ResourceMark rm;
2396 2429 HandleMark hm;
2397 2430
2398 2431 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2399 2432
2400 2433 // Is alive closure.
2401 2434 G1CMIsAliveClosure g1_is_alive(g1h);
2402 2435
2403 2436 // Inner scope to exclude the cleaning of the string and symbol
2404 2437 // tables from the displayed time.
2405 2438 {
2406 2439 if (G1Log::finer()) {
2407 2440 gclog_or_tty->put(' ');
2408 2441 }
2409 2442 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2410 2443
2411 2444 ReferenceProcessor* rp = g1h->ref_processor_cm();
2412 2445
2413 2446 // See the comment in G1CollectedHeap::ref_processing_init()
2414 2447 // about how reference processing currently works in G1.
2415 2448
2416 2449 // Process weak references.
2417 2450 rp->setup_policy(clear_all_soft_refs);
2418 2451 assert(_markStack.isEmpty(), "mark stack should be empty");
2419 2452
2420 2453 G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2421 2454 G1CMDrainMarkingStackClosure
2422 2455 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2423 2456
2424 2457 // We use the work gang from the G1CollectedHeap and we utilize all
2425 2458 // the worker threads.
2426 2459 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2427 2460 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2428 2461
2429 2462 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2430 2463 g1h->workers(), active_workers);
2431 2464
2432 2465 if (rp->processing_is_mt()) {
2433 2466 // Set the degree of MT here. If the discovery is done MT, there
2434 2467 // may have been a different number of threads doing the discovery
2435 2468 // and a different number of discovered lists may have Ref objects.
2436 2469 // That is OK as long as the Reference lists are balanced (see
2437 2470 // balance_all_queues() and balance_queues()).
2438 2471 rp->set_active_mt_degree(active_workers);
2439 2472
2440 2473 rp->process_discovered_references(&g1_is_alive,
2441 2474 &g1_keep_alive,
2442 2475 &g1_drain_mark_stack,
2443 2476 &par_task_executor);
2444 2477
2445 2478 // The work routines of the parallel keep_alive and drain_marking_stack
2446 2479 // will set the has_overflown flag if we overflow the global marking
2447 2480 // stack.
2448 2481 } else {
2449 2482 rp->process_discovered_references(&g1_is_alive,
2450 2483 &g1_keep_alive,
2451 2484 &g1_drain_mark_stack,
2452 2485 NULL);
2453 2486 }
2454 2487
2455 2488 assert(_markStack.overflow() || _markStack.isEmpty(),
2456 2489 "mark stack should be empty (unless it overflowed)");
2457 2490 if (_markStack.overflow()) {
2458 2491 // Should have been done already when we tried to push an
2459 2492 // entry on to the global mark stack. But let's do it again.
2460 2493 set_has_overflown();
2461 2494 }
2462 2495
2463 2496 if (rp->processing_is_mt()) {
2464 2497 assert(rp->num_q() == active_workers, "why not");
2465 2498 rp->enqueue_discovered_references(&par_task_executor);
2466 2499 } else {
2467 2500 rp->enqueue_discovered_references();
2468 2501 }
2469 2502
2470 2503 rp->verify_no_references_recorded();
2471 2504 assert(!rp->discovery_enabled(), "Post condition");
2472 2505 }
2473 2506
2474 2507 // Now clean up stale oops in StringTable
2475 2508 StringTable::unlink(&g1_is_alive);
2476 2509 // Clean up unreferenced symbols in symbol table.
2477 2510 SymbolTable::unlink();
2478 2511 }
2479 2512
2480 2513 void ConcurrentMark::swapMarkBitMaps() {
2481 2514 CMBitMapRO* temp = _prevMarkBitMap;
2482 2515 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2483 2516 _nextMarkBitMap = (CMBitMap*) temp;
2484 2517 }
2485 2518
2486 2519 class CMRemarkTask: public AbstractGangTask {
2487 2520 private:
2488 2521 ConcurrentMark *_cm;
2489 2522
2490 2523 public:
2491 2524 void work(uint worker_id) {
2492 2525 // Since all available tasks are actually started, we should
2493 2526 // only proceed if we're supposed to be actived.
2494 2527 if (worker_id < _cm->active_tasks()) {
2495 2528 CMTask* task = _cm->task(worker_id);
2496 2529 task->record_start_time();
2497 2530 do {
2498 2531 task->do_marking_step(1000000000.0 /* something very large */,
2499 2532 true /* do_stealing */,
2500 2533 true /* do_termination */);
2501 2534 } while (task->has_aborted() && !_cm->has_overflown());
2502 2535 // If we overflow, then we do not want to restart. We instead
2503 2536 // want to abort remark and do concurrent marking again.
2504 2537 task->record_end_time();
2505 2538 }
2506 2539 }
2507 2540
2508 2541 CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2509 2542 AbstractGangTask("Par Remark"), _cm(cm) {
2510 2543 _cm->terminator()->reset_for_reuse(active_workers);
2511 2544 }
2512 2545 };
2513 2546
2514 2547 void ConcurrentMark::checkpointRootsFinalWork() {
2515 2548 ResourceMark rm;
2516 2549 HandleMark hm;
2517 2550 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2518 2551
2519 2552 g1h->ensure_parsability(false);
2520 2553
2521 2554 if (G1CollectedHeap::use_parallel_gc_threads()) {
2522 2555 G1CollectedHeap::StrongRootsScope srs(g1h);
2523 2556 // this is remark, so we'll use up all active threads
2524 2557 uint active_workers = g1h->workers()->active_workers();
2525 2558 if (active_workers == 0) {
2526 2559 assert(active_workers > 0, "Should have been set earlier");
2527 2560 active_workers = (uint) ParallelGCThreads;
2528 2561 g1h->workers()->set_active_workers(active_workers);
2529 2562 }
2530 2563 set_phase(active_workers, false /* concurrent */);
2531 2564 // Leave _parallel_marking_threads at it's
2532 2565 // value originally calculated in the ConcurrentMark
2533 2566 // constructor and pass values of the active workers
2534 2567 // through the gang in the task.
2535 2568
2536 2569 CMRemarkTask remarkTask(this, active_workers);
2537 2570 g1h->set_par_threads(active_workers);
2538 2571 g1h->workers()->run_task(&remarkTask);
2539 2572 g1h->set_par_threads(0);
2540 2573 } else {
2541 2574 G1CollectedHeap::StrongRootsScope srs(g1h);
2542 2575 // this is remark, so we'll use up all available threads
2543 2576 uint active_workers = 1;
2544 2577 set_phase(active_workers, false /* concurrent */);
2545 2578
2546 2579 CMRemarkTask remarkTask(this, active_workers);
2547 2580 // We will start all available threads, even if we decide that the
2548 2581 // active_workers will be fewer. The extra ones will just bail out
2549 2582 // immediately.
2550 2583 remarkTask.work(0);
2551 2584 }
2552 2585 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2553 2586 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2554 2587
2555 2588 print_stats();
2556 2589
2557 2590 #if VERIFY_OBJS_PROCESSED
2558 2591 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2559 2592 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2560 2593 _scan_obj_cl.objs_processed,
2561 2594 ThreadLocalObjQueue::objs_enqueued);
2562 2595 guarantee(_scan_obj_cl.objs_processed ==
2563 2596 ThreadLocalObjQueue::objs_enqueued,
2564 2597 "Different number of objs processed and enqueued.");
2565 2598 }
2566 2599 #endif
2567 2600 }
2568 2601
2569 2602 #ifndef PRODUCT
2570 2603
2571 2604 class PrintReachableOopClosure: public OopClosure {
2572 2605 private:
2573 2606 G1CollectedHeap* _g1h;
2574 2607 outputStream* _out;
2575 2608 VerifyOption _vo;
2576 2609 bool _all;
2577 2610
2578 2611 public:
2579 2612 PrintReachableOopClosure(outputStream* out,
2580 2613 VerifyOption vo,
2581 2614 bool all) :
2582 2615 _g1h(G1CollectedHeap::heap()),
2583 2616 _out(out), _vo(vo), _all(all) { }
2584 2617
2585 2618 void do_oop(narrowOop* p) { do_oop_work(p); }
2586 2619 void do_oop( oop* p) { do_oop_work(p); }
2587 2620
2588 2621 template <class T> void do_oop_work(T* p) {
2589 2622 oop obj = oopDesc::load_decode_heap_oop(p);
2590 2623 const char* str = NULL;
2591 2624 const char* str2 = "";
2592 2625
2593 2626 if (obj == NULL) {
2594 2627 str = "";
2595 2628 } else if (!_g1h->is_in_g1_reserved(obj)) {
2596 2629 str = " O";
2597 2630 } else {
2598 2631 HeapRegion* hr = _g1h->heap_region_containing(obj);
2599 2632 guarantee(hr != NULL, "invariant");
2600 2633 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2601 2634 bool marked = _g1h->is_marked(obj, _vo);
2602 2635
2603 2636 if (over_tams) {
2604 2637 str = " >";
2605 2638 if (marked) {
2606 2639 str2 = " AND MARKED";
2607 2640 }
2608 2641 } else if (marked) {
2609 2642 str = " M";
2610 2643 } else {
2611 2644 str = " NOT";
2612 2645 }
2613 2646 }
2614 2647
2615 2648 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2616 2649 p, (void*) obj, str, str2);
2617 2650 }
2618 2651 };
2619 2652
2620 2653 class PrintReachableObjectClosure : public ObjectClosure {
2621 2654 private:
2622 2655 G1CollectedHeap* _g1h;
2623 2656 outputStream* _out;
2624 2657 VerifyOption _vo;
2625 2658 bool _all;
2626 2659 HeapRegion* _hr;
2627 2660
2628 2661 public:
2629 2662 PrintReachableObjectClosure(outputStream* out,
2630 2663 VerifyOption vo,
2631 2664 bool all,
2632 2665 HeapRegion* hr) :
2633 2666 _g1h(G1CollectedHeap::heap()),
2634 2667 _out(out), _vo(vo), _all(all), _hr(hr) { }
2635 2668
2636 2669 void do_object(oop o) {
2637 2670 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2638 2671 bool marked = _g1h->is_marked(o, _vo);
2639 2672 bool print_it = _all || over_tams || marked;
2640 2673
2641 2674 if (print_it) {
2642 2675 _out->print_cr(" "PTR_FORMAT"%s",
2643 2676 o, (over_tams) ? " >" : (marked) ? " M" : "");
2644 2677 PrintReachableOopClosure oopCl(_out, _vo, _all);
2645 2678 o->oop_iterate_no_header(&oopCl);
2646 2679 }
2647 2680 }
2648 2681 };
2649 2682
2650 2683 class PrintReachableRegionClosure : public HeapRegionClosure {
2651 2684 private:
2652 2685 G1CollectedHeap* _g1h;
2653 2686 outputStream* _out;
2654 2687 VerifyOption _vo;
2655 2688 bool _all;
2656 2689
2657 2690 public:
2658 2691 bool doHeapRegion(HeapRegion* hr) {
2659 2692 HeapWord* b = hr->bottom();
2660 2693 HeapWord* e = hr->end();
2661 2694 HeapWord* t = hr->top();
2662 2695 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2663 2696 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2664 2697 "TAMS: "PTR_FORMAT, b, e, t, p);
2665 2698 _out->cr();
2666 2699
2667 2700 HeapWord* from = b;
2668 2701 HeapWord* to = t;
2669 2702
2670 2703 if (to > from) {
2671 2704 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2672 2705 _out->cr();
2673 2706 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2674 2707 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2675 2708 _out->cr();
2676 2709 }
2677 2710
2678 2711 return false;
2679 2712 }
2680 2713
2681 2714 PrintReachableRegionClosure(outputStream* out,
2682 2715 VerifyOption vo,
2683 2716 bool all) :
2684 2717 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2685 2718 };
2686 2719
2687 2720 void ConcurrentMark::print_reachable(const char* str,
2688 2721 VerifyOption vo,
2689 2722 bool all) {
2690 2723 gclog_or_tty->cr();
2691 2724 gclog_or_tty->print_cr("== Doing heap dump... ");
2692 2725
2693 2726 if (G1PrintReachableBaseFile == NULL) {
2694 2727 gclog_or_tty->print_cr(" #### error: no base file defined");
2695 2728 return;
2696 2729 }
2697 2730
2698 2731 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2699 2732 (JVM_MAXPATHLEN - 1)) {
2700 2733 gclog_or_tty->print_cr(" #### error: file name too long");
2701 2734 return;
2702 2735 }
2703 2736
2704 2737 char file_name[JVM_MAXPATHLEN];
2705 2738 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2706 2739 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2707 2740
2708 2741 fileStream fout(file_name);
2709 2742 if (!fout.is_open()) {
2710 2743 gclog_or_tty->print_cr(" #### error: could not open file");
2711 2744 return;
2712 2745 }
2713 2746
2714 2747 outputStream* out = &fout;
2715 2748 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2716 2749 out->cr();
2717 2750
2718 2751 out->print_cr("--- ITERATING OVER REGIONS");
2719 2752 out->cr();
2720 2753 PrintReachableRegionClosure rcl(out, vo, all);
2721 2754 _g1h->heap_region_iterate(&rcl);
2722 2755 out->cr();
2723 2756
2724 2757 gclog_or_tty->print_cr(" done");
2725 2758 gclog_or_tty->flush();
2726 2759 }
2727 2760
2728 2761 #endif // PRODUCT
2729 2762
2730 2763 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2731 2764 // Note we are overriding the read-only view of the prev map here, via
2732 2765 // the cast.
2733 2766 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2734 2767 }
2735 2768
2736 2769 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2737 2770 _nextMarkBitMap->clearRange(mr);
2738 2771 }
2739 2772
2740 2773 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2741 2774 clearRangePrevBitmap(mr);
2742 2775 clearRangeNextBitmap(mr);
2743 2776 }
2744 2777
2745 2778 HeapRegion*
2746 2779 ConcurrentMark::claim_region(uint worker_id) {
2747 2780 // "checkpoint" the finger
2748 2781 HeapWord* finger = _finger;
2749 2782
2750 2783 // _heap_end will not change underneath our feet; it only changes at
2751 2784 // yield points.
2752 2785 while (finger < _heap_end) {
2753 2786 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2754 2787
2755 2788 // Note on how this code handles humongous regions. In the
2756 2789 // normal case the finger will reach the start of a "starts
2757 2790 // humongous" (SH) region. Its end will either be the end of the
2758 2791 // last "continues humongous" (CH) region in the sequence, or the
2759 2792 // standard end of the SH region (if the SH is the only region in
2760 2793 // the sequence). That way claim_region() will skip over the CH
2761 2794 // regions. However, there is a subtle race between a CM thread
2762 2795 // executing this method and a mutator thread doing a humongous
2763 2796 // object allocation. The two are not mutually exclusive as the CM
2764 2797 // thread does not need to hold the Heap_lock when it gets
2765 2798 // here. So there is a chance that claim_region() will come across
2766 2799 // a free region that's in the progress of becoming a SH or a CH
2767 2800 // region. In the former case, it will either
2768 2801 // a) Miss the update to the region's end, in which case it will
2769 2802 // visit every subsequent CH region, will find their bitmaps
2770 2803 // empty, and do nothing, or
2771 2804 // b) Will observe the update of the region's end (in which case
2772 2805 // it will skip the subsequent CH regions).
2773 2806 // If it comes across a region that suddenly becomes CH, the
2774 2807 // scenario will be similar to b). So, the race between
2775 2808 // claim_region() and a humongous object allocation might force us
2776 2809 // to do a bit of unnecessary work (due to some unnecessary bitmap
2777 2810 // iterations) but it should not introduce and correctness issues.
2778 2811 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2779 2812 HeapWord* bottom = curr_region->bottom();
2780 2813 HeapWord* end = curr_region->end();
2781 2814 HeapWord* limit = curr_region->next_top_at_mark_start();
2782 2815
2783 2816 if (verbose_low()) {
2784 2817 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2785 2818 "["PTR_FORMAT", "PTR_FORMAT"), "
2786 2819 "limit = "PTR_FORMAT,
2787 2820 worker_id, curr_region, bottom, end, limit);
2788 2821 }
2789 2822
2790 2823 // Is the gap between reading the finger and doing the CAS too long?
2791 2824 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2792 2825 if (res == finger) {
2793 2826 // we succeeded
2794 2827
2795 2828 // notice that _finger == end cannot be guaranteed here since,
2796 2829 // someone else might have moved the finger even further
2797 2830 assert(_finger >= end, "the finger should have moved forward");
2798 2831
2799 2832 if (verbose_low()) {
2800 2833 gclog_or_tty->print_cr("[%u] we were successful with region = "
2801 2834 PTR_FORMAT, worker_id, curr_region);
2802 2835 }
2803 2836
2804 2837 if (limit > bottom) {
2805 2838 if (verbose_low()) {
2806 2839 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2807 2840 "returning it ", worker_id, curr_region);
2808 2841 }
2809 2842 return curr_region;
2810 2843 } else {
2811 2844 assert(limit == bottom,
2812 2845 "the region limit should be at bottom");
2813 2846 if (verbose_low()) {
2814 2847 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2815 2848 "returning NULL", worker_id, curr_region);
2816 2849 }
2817 2850 // we return NULL and the caller should try calling
2818 2851 // claim_region() again.
2819 2852 return NULL;
2820 2853 }
2821 2854 } else {
2822 2855 assert(_finger > finger, "the finger should have moved forward");
2823 2856 if (verbose_low()) {
2824 2857 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2825 2858 "global finger = "PTR_FORMAT", "
2826 2859 "our finger = "PTR_FORMAT,
2827 2860 worker_id, _finger, finger);
2828 2861 }
2829 2862
2830 2863 // read it again
2831 2864 finger = _finger;
2832 2865 }
2833 2866 }
2834 2867
2835 2868 return NULL;
2836 2869 }
2837 2870
2838 2871 #ifndef PRODUCT
2839 2872 enum VerifyNoCSetOopsPhase {
2840 2873 VerifyNoCSetOopsStack,
2841 2874 VerifyNoCSetOopsQueues,
2842 2875 VerifyNoCSetOopsSATBCompleted,
2843 2876 VerifyNoCSetOopsSATBThread
2844 2877 };
2845 2878
2846 2879 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
2847 2880 private:
2848 2881 G1CollectedHeap* _g1h;
2849 2882 VerifyNoCSetOopsPhase _phase;
2850 2883 int _info;
2851 2884
2852 2885 const char* phase_str() {
2853 2886 switch (_phase) {
2854 2887 case VerifyNoCSetOopsStack: return "Stack";
2855 2888 case VerifyNoCSetOopsQueues: return "Queue";
2856 2889 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2857 2890 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
2858 2891 default: ShouldNotReachHere();
2859 2892 }
2860 2893 return NULL;
2861 2894 }
2862 2895
2863 2896 void do_object_work(oop obj) {
2864 2897 guarantee(!_g1h->obj_in_cs(obj),
2865 2898 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2866 2899 (void*) obj, phase_str(), _info));
2867 2900 }
2868 2901
2869 2902 public:
2870 2903 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2871 2904
2872 2905 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2873 2906 _phase = phase;
2874 2907 _info = info;
2875 2908 }
2876 2909
2877 2910 virtual void do_oop(oop* p) {
2878 2911 oop obj = oopDesc::load_decode_heap_oop(p);
2879 2912 do_object_work(obj);
2880 2913 }
2881 2914
2882 2915 virtual void do_oop(narrowOop* p) {
2883 2916 // We should not come across narrow oops while scanning marking
2884 2917 // stacks and SATB buffers.
2885 2918 ShouldNotReachHere();
2886 2919 }
2887 2920
2888 2921 virtual void do_object(oop obj) {
2889 2922 do_object_work(obj);
2890 2923 }
2891 2924 };
2892 2925
2893 2926 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2894 2927 bool verify_enqueued_buffers,
2895 2928 bool verify_thread_buffers,
2896 2929 bool verify_fingers) {
2897 2930 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2898 2931 if (!G1CollectedHeap::heap()->mark_in_progress()) {
2899 2932 return;
2900 2933 }
2901 2934
2902 2935 VerifyNoCSetOopsClosure cl;
2903 2936
2904 2937 if (verify_stacks) {
2905 2938 // Verify entries on the global mark stack
2906 2939 cl.set_phase(VerifyNoCSetOopsStack);
2907 2940 _markStack.oops_do(&cl);
2908 2941
2909 2942 // Verify entries on the task queues
2910 2943 for (uint i = 0; i < _max_worker_id; i += 1) {
2911 2944 cl.set_phase(VerifyNoCSetOopsQueues, i);
2912 2945 CMTaskQueue* queue = _task_queues->queue(i);
2913 2946 queue->oops_do(&cl);
2914 2947 }
2915 2948 }
2916 2949
2917 2950 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2918 2951
2919 2952 // Verify entries on the enqueued SATB buffers
2920 2953 if (verify_enqueued_buffers) {
2921 2954 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2922 2955 satb_qs.iterate_completed_buffers_read_only(&cl);
2923 2956 }
2924 2957
2925 2958 // Verify entries on the per-thread SATB buffers
2926 2959 if (verify_thread_buffers) {
2927 2960 cl.set_phase(VerifyNoCSetOopsSATBThread);
2928 2961 satb_qs.iterate_thread_buffers_read_only(&cl);
2929 2962 }
2930 2963
2931 2964 if (verify_fingers) {
2932 2965 // Verify the global finger
2933 2966 HeapWord* global_finger = finger();
2934 2967 if (global_finger != NULL && global_finger < _heap_end) {
2935 2968 // The global finger always points to a heap region boundary. We
2936 2969 // use heap_region_containing_raw() to get the containing region
2937 2970 // given that the global finger could be pointing to a free region
2938 2971 // which subsequently becomes continues humongous. If that
2939 2972 // happens, heap_region_containing() will return the bottom of the
2940 2973 // corresponding starts humongous region and the check below will
2941 2974 // not hold any more.
2942 2975 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2943 2976 guarantee(global_finger == global_hr->bottom(),
2944 2977 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2945 2978 global_finger, HR_FORMAT_PARAMS(global_hr)));
2946 2979 }
2947 2980
2948 2981 // Verify the task fingers
2949 2982 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2950 2983 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2951 2984 CMTask* task = _tasks[i];
2952 2985 HeapWord* task_finger = task->finger();
2953 2986 if (task_finger != NULL && task_finger < _heap_end) {
2954 2987 // See above note on the global finger verification.
2955 2988 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2956 2989 guarantee(task_finger == task_hr->bottom() ||
2957 2990 !task_hr->in_collection_set(),
2958 2991 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2959 2992 task_finger, HR_FORMAT_PARAMS(task_hr)));
2960 2993 }
2961 2994 }
2962 2995 }
2963 2996 }
2964 2997 #endif // PRODUCT
2965 2998
2966 2999 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2967 3000 _markStack.set_should_expand();
2968 3001 _markStack.setEmpty(); // Also clears the _markStack overflow flag
2969 3002 if (clear_overflow) {
2970 3003 clear_has_overflown();
2971 3004 } else {
2972 3005 assert(has_overflown(), "pre-condition");
2973 3006 }
2974 3007 _finger = _heap_start;
2975 3008
2976 3009 for (uint i = 0; i < _max_worker_id; ++i) {
2977 3010 CMTaskQueue* queue = _task_queues->queue(i);
2978 3011 queue->set_empty();
2979 3012 }
2980 3013 }
2981 3014
2982 3015 // Aggregate the counting data that was constructed concurrently
2983 3016 // with marking.
2984 3017 class AggregateCountDataHRClosure: public HeapRegionClosure {
2985 3018 G1CollectedHeap* _g1h;
2986 3019 ConcurrentMark* _cm;
2987 3020 CardTableModRefBS* _ct_bs;
2988 3021 BitMap* _cm_card_bm;
2989 3022 uint _max_worker_id;
2990 3023
2991 3024 public:
2992 3025 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2993 3026 BitMap* cm_card_bm,
2994 3027 uint max_worker_id) :
2995 3028 _g1h(g1h), _cm(g1h->concurrent_mark()),
2996 3029 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2997 3030 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2998 3031
2999 3032 bool doHeapRegion(HeapRegion* hr) {
3000 3033 if (hr->continuesHumongous()) {
3001 3034 // We will ignore these here and process them when their
3002 3035 // associated "starts humongous" region is processed.
3003 3036 // Note that we cannot rely on their associated
3004 3037 // "starts humongous" region to have their bit set to 1
3005 3038 // since, due to the region chunking in the parallel region
3006 3039 // iteration, a "continues humongous" region might be visited
3007 3040 // before its associated "starts humongous".
3008 3041 return false;
3009 3042 }
3010 3043
3011 3044 HeapWord* start = hr->bottom();
3012 3045 HeapWord* limit = hr->next_top_at_mark_start();
3013 3046 HeapWord* end = hr->end();
3014 3047
3015 3048 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3016 3049 err_msg("Preconditions not met - "
3017 3050 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3018 3051 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3019 3052 start, limit, hr->top(), hr->end()));
3020 3053
3021 3054 assert(hr->next_marked_bytes() == 0, "Precondition");
3022 3055
3023 3056 if (start == limit) {
3024 3057 // NTAMS of this region has not been set so nothing to do.
3025 3058 return false;
3026 3059 }
3027 3060
3028 3061 // 'start' should be in the heap.
3029 3062 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3030 3063 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3031 3064 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3032 3065
3033 3066 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3034 3067 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3035 3068 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3036 3069
3037 3070 // If ntams is not card aligned then we bump card bitmap index
3038 3071 // for limit so that we get the all the cards spanned by
3039 3072 // the object ending at ntams.
3040 3073 // Note: if this is the last region in the heap then ntams
3041 3074 // could be actually just beyond the end of the the heap;
3042 3075 // limit_idx will then correspond to a (non-existent) card
3043 3076 // that is also outside the heap.
3044 3077 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3045 3078 limit_idx += 1;
3046 3079 }
3047 3080
3048 3081 assert(limit_idx <= end_idx, "or else use atomics");
3049 3082
3050 3083 // Aggregate the "stripe" in the count data associated with hr.
3051 3084 uint hrs_index = hr->hrs_index();
3052 3085 size_t marked_bytes = 0;
3053 3086
3054 3087 for (uint i = 0; i < _max_worker_id; i += 1) {
3055 3088 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3056 3089 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3057 3090
3058 3091 // Fetch the marked_bytes in this region for task i and
3059 3092 // add it to the running total for this region.
3060 3093 marked_bytes += marked_bytes_array[hrs_index];
3061 3094
3062 3095 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3063 3096 // into the global card bitmap.
3064 3097 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3065 3098
3066 3099 while (scan_idx < limit_idx) {
3067 3100 assert(task_card_bm->at(scan_idx) == true, "should be");
3068 3101 _cm_card_bm->set_bit(scan_idx);
3069 3102 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3070 3103
3071 3104 // BitMap::get_next_one_offset() can handle the case when
3072 3105 // its left_offset parameter is greater than its right_offset
3073 3106 // parameter. It does, however, have an early exit if
3074 3107 // left_offset == right_offset. So let's limit the value
3075 3108 // passed in for left offset here.
3076 3109 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3077 3110 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3078 3111 }
3079 3112 }
3080 3113
3081 3114 // Update the marked bytes for this region.
3082 3115 hr->add_to_marked_bytes(marked_bytes);
3083 3116
3084 3117 // Next heap region
3085 3118 return false;
3086 3119 }
3087 3120 };
3088 3121
3089 3122 class G1AggregateCountDataTask: public AbstractGangTask {
3090 3123 protected:
3091 3124 G1CollectedHeap* _g1h;
3092 3125 ConcurrentMark* _cm;
3093 3126 BitMap* _cm_card_bm;
3094 3127 uint _max_worker_id;
3095 3128 int _active_workers;
3096 3129
3097 3130 public:
3098 3131 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3099 3132 ConcurrentMark* cm,
3100 3133 BitMap* cm_card_bm,
3101 3134 uint max_worker_id,
3102 3135 int n_workers) :
3103 3136 AbstractGangTask("Count Aggregation"),
3104 3137 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3105 3138 _max_worker_id(max_worker_id),
3106 3139 _active_workers(n_workers) { }
3107 3140
3108 3141 void work(uint worker_id) {
3109 3142 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3110 3143
3111 3144 if (G1CollectedHeap::use_parallel_gc_threads()) {
3112 3145 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3113 3146 _active_workers,
3114 3147 HeapRegion::AggregateCountClaimValue);
3115 3148 } else {
3116 3149 _g1h->heap_region_iterate(&cl);
3117 3150 }
3118 3151 }
3119 3152 };
3120 3153
3121 3154
3122 3155 void ConcurrentMark::aggregate_count_data() {
3123 3156 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3124 3157 _g1h->workers()->active_workers() :
3125 3158 1);
3126 3159
3127 3160 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3128 3161 _max_worker_id, n_workers);
3129 3162
3130 3163 if (G1CollectedHeap::use_parallel_gc_threads()) {
3131 3164 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3132 3165 "sanity check");
3133 3166 _g1h->set_par_threads(n_workers);
3134 3167 _g1h->workers()->run_task(&g1_par_agg_task);
3135 3168 _g1h->set_par_threads(0);
3136 3169
3137 3170 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3138 3171 "sanity check");
3139 3172 _g1h->reset_heap_region_claim_values();
3140 3173 } else {
3141 3174 g1_par_agg_task.work(0);
3142 3175 }
3143 3176 }
3144 3177
3145 3178 // Clear the per-worker arrays used to store the per-region counting data
3146 3179 void ConcurrentMark::clear_all_count_data() {
3147 3180 // Clear the global card bitmap - it will be filled during
3148 3181 // liveness count aggregation (during remark) and the
3149 3182 // final counting task.
3150 3183 _card_bm.clear();
3151 3184
3152 3185 // Clear the global region bitmap - it will be filled as part
3153 3186 // of the final counting task.
3154 3187 _region_bm.clear();
3155 3188
3156 3189 uint max_regions = _g1h->max_regions();
3157 3190 assert(_max_worker_id > 0, "uninitialized");
3158 3191
3159 3192 for (uint i = 0; i < _max_worker_id; i += 1) {
3160 3193 BitMap* task_card_bm = count_card_bitmap_for(i);
3161 3194 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3162 3195
3163 3196 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3164 3197 assert(marked_bytes_array != NULL, "uninitialized");
3165 3198
3166 3199 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3167 3200 task_card_bm->clear();
3168 3201 }
3169 3202 }
3170 3203
3171 3204 void ConcurrentMark::print_stats() {
3172 3205 if (verbose_stats()) {
3173 3206 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3174 3207 for (size_t i = 0; i < _active_tasks; ++i) {
3175 3208 _tasks[i]->print_stats();
3176 3209 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3177 3210 }
3178 3211 }
3179 3212 }
3180 3213
3181 3214 // abandon current marking iteration due to a Full GC
3182 3215 void ConcurrentMark::abort() {
3183 3216 // Clear all marks to force marking thread to do nothing
3184 3217 _nextMarkBitMap->clearAll();
3185 3218 // Clear the liveness counting data
3186 3219 clear_all_count_data();
3187 3220 // Empty mark stack
3188 3221 clear_marking_state();
3189 3222 for (uint i = 0; i < _max_worker_id; ++i) {
3190 3223 _tasks[i]->clear_region_fields();
3191 3224 }
3192 3225 _has_aborted = true;
3193 3226
3194 3227 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3195 3228 satb_mq_set.abandon_partial_marking();
3196 3229 // This can be called either during or outside marking, we'll read
3197 3230 // the expected_active value from the SATB queue set.
3198 3231 satb_mq_set.set_active_all_threads(
3199 3232 false, /* new active value */
3200 3233 satb_mq_set.is_active() /* expected_active */);
3201 3234 }
3202 3235
3203 3236 static void print_ms_time_info(const char* prefix, const char* name,
3204 3237 NumberSeq& ns) {
3205 3238 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3206 3239 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3207 3240 if (ns.num() > 0) {
3208 3241 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3209 3242 prefix, ns.sd(), ns.maximum());
3210 3243 }
3211 3244 }
3212 3245
3213 3246 void ConcurrentMark::print_summary_info() {
3214 3247 gclog_or_tty->print_cr(" Concurrent marking:");
3215 3248 print_ms_time_info(" ", "init marks", _init_times);
3216 3249 print_ms_time_info(" ", "remarks", _remark_times);
3217 3250 {
3218 3251 print_ms_time_info(" ", "final marks", _remark_mark_times);
3219 3252 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3220 3253
3221 3254 }
3222 3255 print_ms_time_info(" ", "cleanups", _cleanup_times);
3223 3256 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3224 3257 _total_counting_time,
3225 3258 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3226 3259 (double)_cleanup_times.num()
3227 3260 : 0.0));
3228 3261 if (G1ScrubRemSets) {
3229 3262 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3230 3263 _total_rs_scrub_time,
3231 3264 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3232 3265 (double)_cleanup_times.num()
3233 3266 : 0.0));
3234 3267 }
3235 3268 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3236 3269 (_init_times.sum() + _remark_times.sum() +
3237 3270 _cleanup_times.sum())/1000.0);
3238 3271 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3239 3272 "(%8.2f s marking).",
3240 3273 cmThread()->vtime_accum(),
3241 3274 cmThread()->vtime_mark_accum());
3242 3275 }
3243 3276
3244 3277 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3245 3278 _parallel_workers->print_worker_threads_on(st);
3246 3279 }
3247 3280
3248 3281 // We take a break if someone is trying to stop the world.
3249 3282 bool ConcurrentMark::do_yield_check(uint worker_id) {
3250 3283 if (should_yield()) {
3251 3284 if (worker_id == 0) {
3252 3285 _g1h->g1_policy()->record_concurrent_pause();
3253 3286 }
3254 3287 cmThread()->yield();
3255 3288 return true;
3256 3289 } else {
3257 3290 return false;
3258 3291 }
3259 3292 }
3260 3293
3261 3294 bool ConcurrentMark::should_yield() {
3262 3295 return cmThread()->should_yield();
3263 3296 }
3264 3297
3265 3298 bool ConcurrentMark::containing_card_is_marked(void* p) {
3266 3299 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3267 3300 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3268 3301 }
3269 3302
3270 3303 bool ConcurrentMark::containing_cards_are_marked(void* start,
3271 3304 void* last) {
3272 3305 return containing_card_is_marked(start) &&
3273 3306 containing_card_is_marked(last);
3274 3307 }
3275 3308
3276 3309 #ifndef PRODUCT
3277 3310 // for debugging purposes
3278 3311 void ConcurrentMark::print_finger() {
3279 3312 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3280 3313 _heap_start, _heap_end, _finger);
3281 3314 for (uint i = 0; i < _max_worker_id; ++i) {
3282 3315 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger());
3283 3316 }
3284 3317 gclog_or_tty->print_cr("");
3285 3318 }
3286 3319 #endif
3287 3320
3288 3321 void CMTask::scan_object(oop obj) {
3289 3322 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3290 3323
3291 3324 if (_cm->verbose_high()) {
3292 3325 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3293 3326 _worker_id, (void*) obj);
3294 3327 }
3295 3328
3296 3329 size_t obj_size = obj->size();
3297 3330 _words_scanned += obj_size;
3298 3331
3299 3332 obj->oop_iterate(_cm_oop_closure);
3300 3333 statsOnly( ++_objs_scanned );
3301 3334 check_limits();
3302 3335 }
3303 3336
3304 3337 // Closure for iteration over bitmaps
3305 3338 class CMBitMapClosure : public BitMapClosure {
3306 3339 private:
3307 3340 // the bitmap that is being iterated over
3308 3341 CMBitMap* _nextMarkBitMap;
3309 3342 ConcurrentMark* _cm;
3310 3343 CMTask* _task;
3311 3344
3312 3345 public:
3313 3346 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3314 3347 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3315 3348
3316 3349 bool do_bit(size_t offset) {
3317 3350 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3318 3351 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3319 3352 assert( addr < _cm->finger(), "invariant");
3320 3353
3321 3354 statsOnly( _task->increase_objs_found_on_bitmap() );
3322 3355 assert(addr >= _task->finger(), "invariant");
3323 3356
3324 3357 // We move that task's local finger along.
3325 3358 _task->move_finger_to(addr);
3326 3359
3327 3360 _task->scan_object(oop(addr));
3328 3361 // we only partially drain the local queue and global stack
3329 3362 _task->drain_local_queue(true);
3330 3363 _task->drain_global_stack(true);
3331 3364
3332 3365 // if the has_aborted flag has been raised, we need to bail out of
3333 3366 // the iteration
3334 3367 return !_task->has_aborted();
3335 3368 }
3336 3369 };
3337 3370
3338 3371 // Closure for iterating over objects, currently only used for
3339 3372 // processing SATB buffers.
3340 3373 class CMObjectClosure : public ObjectClosure {
3341 3374 private:
3342 3375 CMTask* _task;
3343 3376
3344 3377 public:
3345 3378 void do_object(oop obj) {
3346 3379 _task->deal_with_reference(obj);
3347 3380 }
3348 3381
3349 3382 CMObjectClosure(CMTask* task) : _task(task) { }
3350 3383 };
3351 3384
3352 3385 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3353 3386 ConcurrentMark* cm,
3354 3387 CMTask* task)
3355 3388 : _g1h(g1h), _cm(cm), _task(task) {
3356 3389 assert(_ref_processor == NULL, "should be initialized to NULL");
3357 3390
3358 3391 if (G1UseConcMarkReferenceProcessing) {
3359 3392 _ref_processor = g1h->ref_processor_cm();
3360 3393 assert(_ref_processor != NULL, "should not be NULL");
3361 3394 }
3362 3395 }
3363 3396
3364 3397 void CMTask::setup_for_region(HeapRegion* hr) {
3365 3398 // Separated the asserts so that we know which one fires.
3366 3399 assert(hr != NULL,
3367 3400 "claim_region() should have filtered out continues humongous regions");
3368 3401 assert(!hr->continuesHumongous(),
3369 3402 "claim_region() should have filtered out continues humongous regions");
3370 3403
3371 3404 if (_cm->verbose_low()) {
3372 3405 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3373 3406 _worker_id, hr);
3374 3407 }
3375 3408
3376 3409 _curr_region = hr;
3377 3410 _finger = hr->bottom();
3378 3411 update_region_limit();
3379 3412 }
3380 3413
3381 3414 void CMTask::update_region_limit() {
3382 3415 HeapRegion* hr = _curr_region;
3383 3416 HeapWord* bottom = hr->bottom();
3384 3417 HeapWord* limit = hr->next_top_at_mark_start();
3385 3418
3386 3419 if (limit == bottom) {
3387 3420 if (_cm->verbose_low()) {
3388 3421 gclog_or_tty->print_cr("[%u] found an empty region "
3389 3422 "["PTR_FORMAT", "PTR_FORMAT")",
3390 3423 _worker_id, bottom, limit);
3391 3424 }
3392 3425 // The region was collected underneath our feet.
3393 3426 // We set the finger to bottom to ensure that the bitmap
3394 3427 // iteration that will follow this will not do anything.
3395 3428 // (this is not a condition that holds when we set the region up,
3396 3429 // as the region is not supposed to be empty in the first place)
3397 3430 _finger = bottom;
3398 3431 } else if (limit >= _region_limit) {
3399 3432 assert(limit >= _finger, "peace of mind");
3400 3433 } else {
3401 3434 assert(limit < _region_limit, "only way to get here");
3402 3435 // This can happen under some pretty unusual circumstances. An
3403 3436 // evacuation pause empties the region underneath our feet (NTAMS
3404 3437 // at bottom). We then do some allocation in the region (NTAMS
3405 3438 // stays at bottom), followed by the region being used as a GC
3406 3439 // alloc region (NTAMS will move to top() and the objects
3407 3440 // originally below it will be grayed). All objects now marked in
3408 3441 // the region are explicitly grayed, if below the global finger,
3409 3442 // and we do not need in fact to scan anything else. So, we simply
3410 3443 // set _finger to be limit to ensure that the bitmap iteration
3411 3444 // doesn't do anything.
3412 3445 _finger = limit;
3413 3446 }
3414 3447
3415 3448 _region_limit = limit;
3416 3449 }
3417 3450
3418 3451 void CMTask::giveup_current_region() {
3419 3452 assert(_curr_region != NULL, "invariant");
3420 3453 if (_cm->verbose_low()) {
3421 3454 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3422 3455 _worker_id, _curr_region);
3423 3456 }
3424 3457 clear_region_fields();
3425 3458 }
3426 3459
3427 3460 void CMTask::clear_region_fields() {
3428 3461 // Values for these three fields that indicate that we're not
3429 3462 // holding on to a region.
3430 3463 _curr_region = NULL;
3431 3464 _finger = NULL;
3432 3465 _region_limit = NULL;
3433 3466 }
3434 3467
3435 3468 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3436 3469 if (cm_oop_closure == NULL) {
3437 3470 assert(_cm_oop_closure != NULL, "invariant");
3438 3471 } else {
3439 3472 assert(_cm_oop_closure == NULL, "invariant");
3440 3473 }
3441 3474 _cm_oop_closure = cm_oop_closure;
3442 3475 }
3443 3476
3444 3477 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3445 3478 guarantee(nextMarkBitMap != NULL, "invariant");
3446 3479
3447 3480 if (_cm->verbose_low()) {
3448 3481 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3449 3482 }
3450 3483
3451 3484 _nextMarkBitMap = nextMarkBitMap;
3452 3485 clear_region_fields();
3453 3486
3454 3487 _calls = 0;
3455 3488 _elapsed_time_ms = 0.0;
3456 3489 _termination_time_ms = 0.0;
3457 3490 _termination_start_time_ms = 0.0;
3458 3491
3459 3492 #if _MARKING_STATS_
3460 3493 _local_pushes = 0;
3461 3494 _local_pops = 0;
3462 3495 _local_max_size = 0;
3463 3496 _objs_scanned = 0;
3464 3497 _global_pushes = 0;
3465 3498 _global_pops = 0;
3466 3499 _global_max_size = 0;
3467 3500 _global_transfers_to = 0;
3468 3501 _global_transfers_from = 0;
3469 3502 _regions_claimed = 0;
3470 3503 _objs_found_on_bitmap = 0;
3471 3504 _satb_buffers_processed = 0;
3472 3505 _steal_attempts = 0;
3473 3506 _steals = 0;
3474 3507 _aborted = 0;
3475 3508 _aborted_overflow = 0;
3476 3509 _aborted_cm_aborted = 0;
3477 3510 _aborted_yield = 0;
3478 3511 _aborted_timed_out = 0;
3479 3512 _aborted_satb = 0;
3480 3513 _aborted_termination = 0;
3481 3514 #endif // _MARKING_STATS_
3482 3515 }
3483 3516
3484 3517 bool CMTask::should_exit_termination() {
3485 3518 regular_clock_call();
3486 3519 // This is called when we are in the termination protocol. We should
3487 3520 // quit if, for some reason, this task wants to abort or the global
3488 3521 // stack is not empty (this means that we can get work from it).
3489 3522 return !_cm->mark_stack_empty() || has_aborted();
3490 3523 }
3491 3524
3492 3525 void CMTask::reached_limit() {
3493 3526 assert(_words_scanned >= _words_scanned_limit ||
3494 3527 _refs_reached >= _refs_reached_limit ,
3495 3528 "shouldn't have been called otherwise");
3496 3529 regular_clock_call();
3497 3530 }
3498 3531
3499 3532 void CMTask::regular_clock_call() {
3500 3533 if (has_aborted()) return;
3501 3534
3502 3535 // First, we need to recalculate the words scanned and refs reached
3503 3536 // limits for the next clock call.
3504 3537 recalculate_limits();
3505 3538
3506 3539 // During the regular clock call we do the following
3507 3540
3508 3541 // (1) If an overflow has been flagged, then we abort.
3509 3542 if (_cm->has_overflown()) {
3510 3543 set_has_aborted();
3511 3544 return;
3512 3545 }
3513 3546
3514 3547 // If we are not concurrent (i.e. we're doing remark) we don't need
3515 3548 // to check anything else. The other steps are only needed during
3516 3549 // the concurrent marking phase.
3517 3550 if (!concurrent()) return;
3518 3551
3519 3552 // (2) If marking has been aborted for Full GC, then we also abort.
3520 3553 if (_cm->has_aborted()) {
3521 3554 set_has_aborted();
3522 3555 statsOnly( ++_aborted_cm_aborted );
3523 3556 return;
3524 3557 }
3525 3558
3526 3559 double curr_time_ms = os::elapsedVTime() * 1000.0;
3527 3560
3528 3561 // (3) If marking stats are enabled, then we update the step history.
3529 3562 #if _MARKING_STATS_
3530 3563 if (_words_scanned >= _words_scanned_limit) {
3531 3564 ++_clock_due_to_scanning;
3532 3565 }
3533 3566 if (_refs_reached >= _refs_reached_limit) {
3534 3567 ++_clock_due_to_marking;
3535 3568 }
3536 3569
3537 3570 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3538 3571 _interval_start_time_ms = curr_time_ms;
3539 3572 _all_clock_intervals_ms.add(last_interval_ms);
3540 3573
3541 3574 if (_cm->verbose_medium()) {
3542 3575 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3543 3576 "scanned = %d%s, refs reached = %d%s",
3544 3577 _worker_id, last_interval_ms,
3545 3578 _words_scanned,
3546 3579 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3547 3580 _refs_reached,
3548 3581 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3549 3582 }
3550 3583 #endif // _MARKING_STATS_
3551 3584
3552 3585 // (4) We check whether we should yield. If we have to, then we abort.
3553 3586 if (_cm->should_yield()) {
3554 3587 // We should yield. To do this we abort the task. The caller is
3555 3588 // responsible for yielding.
3556 3589 set_has_aborted();
3557 3590 statsOnly( ++_aborted_yield );
3558 3591 return;
3559 3592 }
3560 3593
3561 3594 // (5) We check whether we've reached our time quota. If we have,
3562 3595 // then we abort.
3563 3596 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3564 3597 if (elapsed_time_ms > _time_target_ms) {
3565 3598 set_has_aborted();
3566 3599 _has_timed_out = true;
3567 3600 statsOnly( ++_aborted_timed_out );
3568 3601 return;
3569 3602 }
3570 3603
3571 3604 // (6) Finally, we check whether there are enough completed STAB
3572 3605 // buffers available for processing. If there are, we abort.
3573 3606 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3574 3607 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3575 3608 if (_cm->verbose_low()) {
3576 3609 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3577 3610 _worker_id);
3578 3611 }
3579 3612 // we do need to process SATB buffers, we'll abort and restart
3580 3613 // the marking task to do so
3581 3614 set_has_aborted();
3582 3615 statsOnly( ++_aborted_satb );
3583 3616 return;
3584 3617 }
3585 3618 }
3586 3619
3587 3620 void CMTask::recalculate_limits() {
3588 3621 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3589 3622 _words_scanned_limit = _real_words_scanned_limit;
3590 3623
3591 3624 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3592 3625 _refs_reached_limit = _real_refs_reached_limit;
3593 3626 }
3594 3627
3595 3628 void CMTask::decrease_limits() {
3596 3629 // This is called when we believe that we're going to do an infrequent
3597 3630 // operation which will increase the per byte scanned cost (i.e. move
3598 3631 // entries to/from the global stack). It basically tries to decrease the
3599 3632 // scanning limit so that the clock is called earlier.
3600 3633
3601 3634 if (_cm->verbose_medium()) {
3602 3635 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3603 3636 }
3604 3637
3605 3638 _words_scanned_limit = _real_words_scanned_limit -
3606 3639 3 * words_scanned_period / 4;
3607 3640 _refs_reached_limit = _real_refs_reached_limit -
3608 3641 3 * refs_reached_period / 4;
3609 3642 }
3610 3643
3611 3644 void CMTask::move_entries_to_global_stack() {
3612 3645 // local array where we'll store the entries that will be popped
3613 3646 // from the local queue
3614 3647 oop buffer[global_stack_transfer_size];
3615 3648
3616 3649 int n = 0;
3617 3650 oop obj;
3618 3651 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3619 3652 buffer[n] = obj;
3620 3653 ++n;
3621 3654 }
3622 3655
3623 3656 if (n > 0) {
3624 3657 // we popped at least one entry from the local queue
3625 3658
3626 3659 statsOnly( ++_global_transfers_to; _local_pops += n );
3627 3660
3628 3661 if (!_cm->mark_stack_push(buffer, n)) {
3629 3662 if (_cm->verbose_low()) {
3630 3663 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3631 3664 _worker_id);
3632 3665 }
3633 3666 set_has_aborted();
3634 3667 } else {
3635 3668 // the transfer was successful
3636 3669
3637 3670 if (_cm->verbose_medium()) {
3638 3671 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3639 3672 _worker_id, n);
3640 3673 }
3641 3674 statsOnly( int tmp_size = _cm->mark_stack_size();
3642 3675 if (tmp_size > _global_max_size) {
3643 3676 _global_max_size = tmp_size;
3644 3677 }
3645 3678 _global_pushes += n );
3646 3679 }
3647 3680 }
3648 3681
3649 3682 // this operation was quite expensive, so decrease the limits
3650 3683 decrease_limits();
3651 3684 }
3652 3685
3653 3686 void CMTask::get_entries_from_global_stack() {
3654 3687 // local array where we'll store the entries that will be popped
3655 3688 // from the global stack.
3656 3689 oop buffer[global_stack_transfer_size];
3657 3690 int n;
3658 3691 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3659 3692 assert(n <= global_stack_transfer_size,
3660 3693 "we should not pop more than the given limit");
3661 3694 if (n > 0) {
3662 3695 // yes, we did actually pop at least one entry
3663 3696
3664 3697 statsOnly( ++_global_transfers_from; _global_pops += n );
3665 3698 if (_cm->verbose_medium()) {
3666 3699 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3667 3700 _worker_id, n);
3668 3701 }
3669 3702 for (int i = 0; i < n; ++i) {
3670 3703 bool success = _task_queue->push(buffer[i]);
3671 3704 // We only call this when the local queue is empty or under a
3672 3705 // given target limit. So, we do not expect this push to fail.
3673 3706 assert(success, "invariant");
3674 3707 }
3675 3708
3676 3709 statsOnly( int tmp_size = _task_queue->size();
3677 3710 if (tmp_size > _local_max_size) {
3678 3711 _local_max_size = tmp_size;
3679 3712 }
3680 3713 _local_pushes += n );
3681 3714 }
3682 3715
3683 3716 // this operation was quite expensive, so decrease the limits
3684 3717 decrease_limits();
3685 3718 }
3686 3719
3687 3720 void CMTask::drain_local_queue(bool partially) {
3688 3721 if (has_aborted()) return;
3689 3722
3690 3723 // Decide what the target size is, depending whether we're going to
3691 3724 // drain it partially (so that other tasks can steal if they run out
3692 3725 // of things to do) or totally (at the very end).
3693 3726 size_t target_size;
3694 3727 if (partially) {
3695 3728 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3696 3729 } else {
3697 3730 target_size = 0;
3698 3731 }
3699 3732
3700 3733 if (_task_queue->size() > target_size) {
3701 3734 if (_cm->verbose_high()) {
3702 3735 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3703 3736 _worker_id, target_size);
3704 3737 }
3705 3738
3706 3739 oop obj;
3707 3740 bool ret = _task_queue->pop_local(obj);
3708 3741 while (ret) {
3709 3742 statsOnly( ++_local_pops );
3710 3743
3711 3744 if (_cm->verbose_high()) {
3712 3745 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3713 3746 (void*) obj);
3714 3747 }
3715 3748
3716 3749 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3717 3750 assert(!_g1h->is_on_master_free_list(
3718 3751 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3719 3752
3720 3753 scan_object(obj);
3721 3754
3722 3755 if (_task_queue->size() <= target_size || has_aborted()) {
3723 3756 ret = false;
3724 3757 } else {
3725 3758 ret = _task_queue->pop_local(obj);
3726 3759 }
3727 3760 }
3728 3761
3729 3762 if (_cm->verbose_high()) {
3730 3763 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3731 3764 _worker_id, _task_queue->size());
3732 3765 }
3733 3766 }
3734 3767 }
3735 3768
3736 3769 void CMTask::drain_global_stack(bool partially) {
3737 3770 if (has_aborted()) return;
3738 3771
3739 3772 // We have a policy to drain the local queue before we attempt to
3740 3773 // drain the global stack.
3741 3774 assert(partially || _task_queue->size() == 0, "invariant");
3742 3775
3743 3776 // Decide what the target size is, depending whether we're going to
3744 3777 // drain it partially (so that other tasks can steal if they run out
3745 3778 // of things to do) or totally (at the very end). Notice that,
3746 3779 // because we move entries from the global stack in chunks or
3747 3780 // because another task might be doing the same, we might in fact
3748 3781 // drop below the target. But, this is not a problem.
3749 3782 size_t target_size;
3750 3783 if (partially) {
3751 3784 target_size = _cm->partial_mark_stack_size_target();
3752 3785 } else {
3753 3786 target_size = 0;
3754 3787 }
3755 3788
3756 3789 if (_cm->mark_stack_size() > target_size) {
3757 3790 if (_cm->verbose_low()) {
3758 3791 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3759 3792 _worker_id, target_size);
3760 3793 }
3761 3794
3762 3795 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3763 3796 get_entries_from_global_stack();
3764 3797 drain_local_queue(partially);
3765 3798 }
3766 3799
3767 3800 if (_cm->verbose_low()) {
3768 3801 gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3769 3802 _worker_id, _cm->mark_stack_size());
3770 3803 }
3771 3804 }
3772 3805 }
3773 3806
3774 3807 // SATB Queue has several assumptions on whether to call the par or
3775 3808 // non-par versions of the methods. this is why some of the code is
3776 3809 // replicated. We should really get rid of the single-threaded version
3777 3810 // of the code to simplify things.
3778 3811 void CMTask::drain_satb_buffers() {
3779 3812 if (has_aborted()) return;
3780 3813
3781 3814 // We set this so that the regular clock knows that we're in the
3782 3815 // middle of draining buffers and doesn't set the abort flag when it
3783 3816 // notices that SATB buffers are available for draining. It'd be
3784 3817 // very counter productive if it did that. :-)
3785 3818 _draining_satb_buffers = true;
3786 3819
3787 3820 CMObjectClosure oc(this);
3788 3821 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3789 3822 if (G1CollectedHeap::use_parallel_gc_threads()) {
3790 3823 satb_mq_set.set_par_closure(_worker_id, &oc);
3791 3824 } else {
3792 3825 satb_mq_set.set_closure(&oc);
3793 3826 }
3794 3827
3795 3828 // This keeps claiming and applying the closure to completed buffers
3796 3829 // until we run out of buffers or we need to abort.
3797 3830 if (G1CollectedHeap::use_parallel_gc_threads()) {
3798 3831 while (!has_aborted() &&
3799 3832 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3800 3833 if (_cm->verbose_medium()) {
3801 3834 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3802 3835 }
3803 3836 statsOnly( ++_satb_buffers_processed );
3804 3837 regular_clock_call();
3805 3838 }
3806 3839 } else {
3807 3840 while (!has_aborted() &&
3808 3841 satb_mq_set.apply_closure_to_completed_buffer()) {
3809 3842 if (_cm->verbose_medium()) {
3810 3843 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3811 3844 }
3812 3845 statsOnly( ++_satb_buffers_processed );
3813 3846 regular_clock_call();
3814 3847 }
3815 3848 }
3816 3849
3817 3850 if (!concurrent() && !has_aborted()) {
3818 3851 // We should only do this during remark.
3819 3852 if (G1CollectedHeap::use_parallel_gc_threads()) {
3820 3853 satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3821 3854 } else {
3822 3855 satb_mq_set.iterate_closure_all_threads();
3823 3856 }
3824 3857 }
3825 3858
3826 3859 _draining_satb_buffers = false;
3827 3860
3828 3861 assert(has_aborted() ||
3829 3862 concurrent() ||
3830 3863 satb_mq_set.completed_buffers_num() == 0, "invariant");
3831 3864
3832 3865 if (G1CollectedHeap::use_parallel_gc_threads()) {
3833 3866 satb_mq_set.set_par_closure(_worker_id, NULL);
3834 3867 } else {
3835 3868 satb_mq_set.set_closure(NULL);
3836 3869 }
3837 3870
3838 3871 // again, this was a potentially expensive operation, decrease the
3839 3872 // limits to get the regular clock call early
3840 3873 decrease_limits();
3841 3874 }
3842 3875
3843 3876 void CMTask::print_stats() {
3844 3877 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3845 3878 _worker_id, _calls);
3846 3879 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3847 3880 _elapsed_time_ms, _termination_time_ms);
3848 3881 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3849 3882 _step_times_ms.num(), _step_times_ms.avg(),
3850 3883 _step_times_ms.sd());
3851 3884 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3852 3885 _step_times_ms.maximum(), _step_times_ms.sum());
3853 3886
3854 3887 #if _MARKING_STATS_
3855 3888 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3856 3889 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3857 3890 _all_clock_intervals_ms.sd());
3858 3891 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3859 3892 _all_clock_intervals_ms.maximum(),
3860 3893 _all_clock_intervals_ms.sum());
3861 3894 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3862 3895 _clock_due_to_scanning, _clock_due_to_marking);
3863 3896 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3864 3897 _objs_scanned, _objs_found_on_bitmap);
3865 3898 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3866 3899 _local_pushes, _local_pops, _local_max_size);
3867 3900 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3868 3901 _global_pushes, _global_pops, _global_max_size);
3869 3902 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3870 3903 _global_transfers_to,_global_transfers_from);
3871 3904 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
3872 3905 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3873 3906 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3874 3907 _steal_attempts, _steals);
3875 3908 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3876 3909 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3877 3910 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3878 3911 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3879 3912 _aborted_timed_out, _aborted_satb, _aborted_termination);
3880 3913 #endif // _MARKING_STATS_
3881 3914 }
3882 3915
3883 3916 /*****************************************************************************
3884 3917
3885 3918 The do_marking_step(time_target_ms) method is the building block
3886 3919 of the parallel marking framework. It can be called in parallel
3887 3920 with other invocations of do_marking_step() on different tasks
3888 3921 (but only one per task, obviously) and concurrently with the
3889 3922 mutator threads, or during remark, hence it eliminates the need
3890 3923 for two versions of the code. When called during remark, it will
3891 3924 pick up from where the task left off during the concurrent marking
3892 3925 phase. Interestingly, tasks are also claimable during evacuation
3893 3926 pauses too, since do_marking_step() ensures that it aborts before
3894 3927 it needs to yield.
3895 3928
3896 3929 The data structures that is uses to do marking work are the
3897 3930 following:
3898 3931
3899 3932 (1) Marking Bitmap. If there are gray objects that appear only
3900 3933 on the bitmap (this happens either when dealing with an overflow
3901 3934 or when the initial marking phase has simply marked the roots
3902 3935 and didn't push them on the stack), then tasks claim heap
3903 3936 regions whose bitmap they then scan to find gray objects. A
3904 3937 global finger indicates where the end of the last claimed region
3905 3938 is. A local finger indicates how far into the region a task has
3906 3939 scanned. The two fingers are used to determine how to gray an
3907 3940 object (i.e. whether simply marking it is OK, as it will be
3908 3941 visited by a task in the future, or whether it needs to be also
3909 3942 pushed on a stack).
3910 3943
3911 3944 (2) Local Queue. The local queue of the task which is accessed
3912 3945 reasonably efficiently by the task. Other tasks can steal from
3913 3946 it when they run out of work. Throughout the marking phase, a
3914 3947 task attempts to keep its local queue short but not totally
3915 3948 empty, so that entries are available for stealing by other
3916 3949 tasks. Only when there is no more work, a task will totally
3917 3950 drain its local queue.
3918 3951
3919 3952 (3) Global Mark Stack. This handles local queue overflow. During
3920 3953 marking only sets of entries are moved between it and the local
3921 3954 queues, as access to it requires a mutex and more fine-grain
3922 3955 interaction with it which might cause contention. If it
3923 3956 overflows, then the marking phase should restart and iterate
3924 3957 over the bitmap to identify gray objects. Throughout the marking
3925 3958 phase, tasks attempt to keep the global mark stack at a small
3926 3959 length but not totally empty, so that entries are available for
3927 3960 popping by other tasks. Only when there is no more work, tasks
3928 3961 will totally drain the global mark stack.
3929 3962
3930 3963 (4) SATB Buffer Queue. This is where completed SATB buffers are
3931 3964 made available. Buffers are regularly removed from this queue
3932 3965 and scanned for roots, so that the queue doesn't get too
3933 3966 long. During remark, all completed buffers are processed, as
3934 3967 well as the filled in parts of any uncompleted buffers.
3935 3968
3936 3969 The do_marking_step() method tries to abort when the time target
3937 3970 has been reached. There are a few other cases when the
3938 3971 do_marking_step() method also aborts:
3939 3972
3940 3973 (1) When the marking phase has been aborted (after a Full GC).
3941 3974
3942 3975 (2) When a global overflow (on the global stack) has been
3943 3976 triggered. Before the task aborts, it will actually sync up with
3944 3977 the other tasks to ensure that all the marking data structures
3945 3978 (local queues, stacks, fingers etc.) are re-initialised so that
3946 3979 when do_marking_step() completes, the marking phase can
3947 3980 immediately restart.
3948 3981
3949 3982 (3) When enough completed SATB buffers are available. The
3950 3983 do_marking_step() method only tries to drain SATB buffers right
3951 3984 at the beginning. So, if enough buffers are available, the
3952 3985 marking step aborts and the SATB buffers are processed at
3953 3986 the beginning of the next invocation.
3954 3987
3955 3988 (4) To yield. when we have to yield then we abort and yield
3956 3989 right at the end of do_marking_step(). This saves us from a lot
3957 3990 of hassle as, by yielding we might allow a Full GC. If this
3958 3991 happens then objects will be compacted underneath our feet, the
3959 3992 heap might shrink, etc. We save checking for this by just
3960 3993 aborting and doing the yield right at the end.
3961 3994
3962 3995 From the above it follows that the do_marking_step() method should
3963 3996 be called in a loop (or, otherwise, regularly) until it completes.
3964 3997
3965 3998 If a marking step completes without its has_aborted() flag being
3966 3999 true, it means it has completed the current marking phase (and
3967 4000 also all other marking tasks have done so and have all synced up).
3968 4001
3969 4002 A method called regular_clock_call() is invoked "regularly" (in
3970 4003 sub ms intervals) throughout marking. It is this clock method that
3971 4004 checks all the abort conditions which were mentioned above and
3972 4005 decides when the task should abort. A work-based scheme is used to
3973 4006 trigger this clock method: when the number of object words the
3974 4007 marking phase has scanned or the number of references the marking
3975 4008 phase has visited reach a given limit. Additional invocations to
3976 4009 the method clock have been planted in a few other strategic places
3977 4010 too. The initial reason for the clock method was to avoid calling
3978 4011 vtime too regularly, as it is quite expensive. So, once it was in
3979 4012 place, it was natural to piggy-back all the other conditions on it
3980 4013 too and not constantly check them throughout the code.
3981 4014
3982 4015 *****************************************************************************/
3983 4016
3984 4017 void CMTask::do_marking_step(double time_target_ms,
3985 4018 bool do_stealing,
3986 4019 bool do_termination) {
3987 4020 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3988 4021 assert(concurrent() == _cm->concurrent(), "they should be the same");
3989 4022
3990 4023 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3991 4024 assert(_task_queues != NULL, "invariant");
3992 4025 assert(_task_queue != NULL, "invariant");
3993 4026 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3994 4027
3995 4028 assert(!_claimed,
3996 4029 "only one thread should claim this task at any one time");
3997 4030
3998 4031 // OK, this doesn't safeguard again all possible scenarios, as it is
3999 4032 // possible for two threads to set the _claimed flag at the same
4000 4033 // time. But it is only for debugging purposes anyway and it will
4001 4034 // catch most problems.
4002 4035 _claimed = true;
4003 4036
4004 4037 _start_time_ms = os::elapsedVTime() * 1000.0;
4005 4038 statsOnly( _interval_start_time_ms = _start_time_ms );
4006 4039
4007 4040 double diff_prediction_ms =
4008 4041 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4009 4042 _time_target_ms = time_target_ms - diff_prediction_ms;
4010 4043
4011 4044 // set up the variables that are used in the work-based scheme to
4012 4045 // call the regular clock method
4013 4046 _words_scanned = 0;
4014 4047 _refs_reached = 0;
4015 4048 recalculate_limits();
4016 4049
4017 4050 // clear all flags
4018 4051 clear_has_aborted();
4019 4052 _has_timed_out = false;
4020 4053 _draining_satb_buffers = false;
4021 4054
4022 4055 ++_calls;
4023 4056
4024 4057 if (_cm->verbose_low()) {
4025 4058 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4026 4059 "target = %1.2lfms >>>>>>>>>>",
4027 4060 _worker_id, _calls, _time_target_ms);
4028 4061 }
4029 4062
4030 4063 // Set up the bitmap and oop closures. Anything that uses them is
4031 4064 // eventually called from this method, so it is OK to allocate these
4032 4065 // statically.
4033 4066 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4034 4067 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4035 4068 set_cm_oop_closure(&cm_oop_closure);
4036 4069
4037 4070 if (_cm->has_overflown()) {
4038 4071 // This can happen if the mark stack overflows during a GC pause
4039 4072 // and this task, after a yield point, restarts. We have to abort
4040 4073 // as we need to get into the overflow protocol which happens
4041 4074 // right at the end of this task.
4042 4075 set_has_aborted();
4043 4076 }
4044 4077
4045 4078 // First drain any available SATB buffers. After this, we will not
4046 4079 // look at SATB buffers before the next invocation of this method.
4047 4080 // If enough completed SATB buffers are queued up, the regular clock
4048 4081 // will abort this task so that it restarts.
4049 4082 drain_satb_buffers();
4050 4083 // ...then partially drain the local queue and the global stack
4051 4084 drain_local_queue(true);
4052 4085 drain_global_stack(true);
4053 4086
4054 4087 do {
4055 4088 if (!has_aborted() && _curr_region != NULL) {
4056 4089 // This means that we're already holding on to a region.
4057 4090 assert(_finger != NULL, "if region is not NULL, then the finger "
4058 4091 "should not be NULL either");
4059 4092
4060 4093 // We might have restarted this task after an evacuation pause
4061 4094 // which might have evacuated the region we're holding on to
4062 4095 // underneath our feet. Let's read its limit again to make sure
4063 4096 // that we do not iterate over a region of the heap that
4064 4097 // contains garbage (update_region_limit() will also move
4065 4098 // _finger to the start of the region if it is found empty).
4066 4099 update_region_limit();
4067 4100 // We will start from _finger not from the start of the region,
4068 4101 // as we might be restarting this task after aborting half-way
4069 4102 // through scanning this region. In this case, _finger points to
4070 4103 // the address where we last found a marked object. If this is a
4071 4104 // fresh region, _finger points to start().
4072 4105 MemRegion mr = MemRegion(_finger, _region_limit);
4073 4106
4074 4107 if (_cm->verbose_low()) {
4075 4108 gclog_or_tty->print_cr("[%u] we're scanning part "
4076 4109 "["PTR_FORMAT", "PTR_FORMAT") "
4077 4110 "of region "PTR_FORMAT,
4078 4111 _worker_id, _finger, _region_limit, _curr_region);
4079 4112 }
4080 4113
4081 4114 // Let's iterate over the bitmap of the part of the
4082 4115 // region that is left.
4083 4116 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4084 4117 // We successfully completed iterating over the region. Now,
4085 4118 // let's give up the region.
4086 4119 giveup_current_region();
4087 4120 regular_clock_call();
4088 4121 } else {
4089 4122 assert(has_aborted(), "currently the only way to do so");
4090 4123 // The only way to abort the bitmap iteration is to return
4091 4124 // false from the do_bit() method. However, inside the
4092 4125 // do_bit() method we move the _finger to point to the
4093 4126 // object currently being looked at. So, if we bail out, we
4094 4127 // have definitely set _finger to something non-null.
4095 4128 assert(_finger != NULL, "invariant");
4096 4129
4097 4130 // Region iteration was actually aborted. So now _finger
4098 4131 // points to the address of the object we last scanned. If we
4099 4132 // leave it there, when we restart this task, we will rescan
4100 4133 // the object. It is easy to avoid this. We move the finger by
4101 4134 // enough to point to the next possible object header (the
4102 4135 // bitmap knows by how much we need to move it as it knows its
4103 4136 // granularity).
4104 4137 assert(_finger < _region_limit, "invariant");
4105 4138 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4106 4139 // Check if bitmap iteration was aborted while scanning the last object
4107 4140 if (new_finger >= _region_limit) {
4108 4141 giveup_current_region();
4109 4142 } else {
4110 4143 move_finger_to(new_finger);
4111 4144 }
4112 4145 }
4113 4146 }
4114 4147 // At this point we have either completed iterating over the
4115 4148 // region we were holding on to, or we have aborted.
4116 4149
4117 4150 // We then partially drain the local queue and the global stack.
4118 4151 // (Do we really need this?)
4119 4152 drain_local_queue(true);
4120 4153 drain_global_stack(true);
4121 4154
4122 4155 // Read the note on the claim_region() method on why it might
4123 4156 // return NULL with potentially more regions available for
4124 4157 // claiming and why we have to check out_of_regions() to determine
4125 4158 // whether we're done or not.
4126 4159 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4127 4160 // We are going to try to claim a new region. We should have
4128 4161 // given up on the previous one.
4129 4162 // Separated the asserts so that we know which one fires.
4130 4163 assert(_curr_region == NULL, "invariant");
4131 4164 assert(_finger == NULL, "invariant");
4132 4165 assert(_region_limit == NULL, "invariant");
4133 4166 if (_cm->verbose_low()) {
4134 4167 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4135 4168 }
4136 4169 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4137 4170 if (claimed_region != NULL) {
4138 4171 // Yes, we managed to claim one
4139 4172 statsOnly( ++_regions_claimed );
4140 4173
4141 4174 if (_cm->verbose_low()) {
4142 4175 gclog_or_tty->print_cr("[%u] we successfully claimed "
4143 4176 "region "PTR_FORMAT,
4144 4177 _worker_id, claimed_region);
4145 4178 }
4146 4179
4147 4180 setup_for_region(claimed_region);
4148 4181 assert(_curr_region == claimed_region, "invariant");
4149 4182 }
4150 4183 // It is important to call the regular clock here. It might take
4151 4184 // a while to claim a region if, for example, we hit a large
4152 4185 // block of empty regions. So we need to call the regular clock
4153 4186 // method once round the loop to make sure it's called
4154 4187 // frequently enough.
4155 4188 regular_clock_call();
4156 4189 }
4157 4190
4158 4191 if (!has_aborted() && _curr_region == NULL) {
4159 4192 assert(_cm->out_of_regions(),
4160 4193 "at this point we should be out of regions");
4161 4194 }
4162 4195 } while ( _curr_region != NULL && !has_aborted());
4163 4196
4164 4197 if (!has_aborted()) {
4165 4198 // We cannot check whether the global stack is empty, since other
4166 4199 // tasks might be pushing objects to it concurrently.
4167 4200 assert(_cm->out_of_regions(),
4168 4201 "at this point we should be out of regions");
4169 4202
4170 4203 if (_cm->verbose_low()) {
4171 4204 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4172 4205 }
4173 4206
4174 4207 // Try to reduce the number of available SATB buffers so that
4175 4208 // remark has less work to do.
4176 4209 drain_satb_buffers();
4177 4210 }
4178 4211
4179 4212 // Since we've done everything else, we can now totally drain the
4180 4213 // local queue and global stack.
4181 4214 drain_local_queue(false);
4182 4215 drain_global_stack(false);
4183 4216
4184 4217 // Attempt at work stealing from other task's queues.
4185 4218 if (do_stealing && !has_aborted()) {
4186 4219 // We have not aborted. This means that we have finished all that
4187 4220 // we could. Let's try to do some stealing...
4188 4221
4189 4222 // We cannot check whether the global stack is empty, since other
4190 4223 // tasks might be pushing objects to it concurrently.
4191 4224 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4192 4225 "only way to reach here");
4193 4226
4194 4227 if (_cm->verbose_low()) {
4195 4228 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4196 4229 }
4197 4230
4198 4231 while (!has_aborted()) {
4199 4232 oop obj;
4200 4233 statsOnly( ++_steal_attempts );
4201 4234
4202 4235 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4203 4236 if (_cm->verbose_medium()) {
4204 4237 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4205 4238 _worker_id, (void*) obj);
4206 4239 }
4207 4240
4208 4241 statsOnly( ++_steals );
4209 4242
4210 4243 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4211 4244 "any stolen object should be marked");
4212 4245 scan_object(obj);
4213 4246
4214 4247 // And since we're towards the end, let's totally drain the
4215 4248 // local queue and global stack.
4216 4249 drain_local_queue(false);
4217 4250 drain_global_stack(false);
4218 4251 } else {
4219 4252 break;
4220 4253 }
4221 4254 }
4222 4255 }
4223 4256
4224 4257 // If we are about to wrap up and go into termination, check if we
4225 4258 // should raise the overflow flag.
4226 4259 if (do_termination && !has_aborted()) {
4227 4260 if (_cm->force_overflow()->should_force()) {
4228 4261 _cm->set_has_overflown();
4229 4262 regular_clock_call();
4230 4263 }
4231 4264 }
4232 4265
4233 4266 // We still haven't aborted. Now, let's try to get into the
4234 4267 // termination protocol.
4235 4268 if (do_termination && !has_aborted()) {
4236 4269 // We cannot check whether the global stack is empty, since other
4237 4270 // tasks might be concurrently pushing objects on it.
4238 4271 // Separated the asserts so that we know which one fires.
4239 4272 assert(_cm->out_of_regions(), "only way to reach here");
4240 4273 assert(_task_queue->size() == 0, "only way to reach here");
4241 4274
4242 4275 if (_cm->verbose_low()) {
4243 4276 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4244 4277 }
4245 4278
4246 4279 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4247 4280 // The CMTask class also extends the TerminatorTerminator class,
4248 4281 // hence its should_exit_termination() method will also decide
4249 4282 // whether to exit the termination protocol or not.
4250 4283 bool finished = _cm->terminator()->offer_termination(this);
4251 4284 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4252 4285 _termination_time_ms +=
4253 4286 termination_end_time_ms - _termination_start_time_ms;
4254 4287
4255 4288 if (finished) {
4256 4289 // We're all done.
4257 4290
4258 4291 if (_worker_id == 0) {
4259 4292 // let's allow task 0 to do this
4260 4293 if (concurrent()) {
4261 4294 assert(_cm->concurrent_marking_in_progress(), "invariant");
4262 4295 // we need to set this to false before the next
4263 4296 // safepoint. This way we ensure that the marking phase
4264 4297 // doesn't observe any more heap expansions.
4265 4298 _cm->clear_concurrent_marking_in_progress();
4266 4299 }
4267 4300 }
4268 4301
4269 4302 // We can now guarantee that the global stack is empty, since
4270 4303 // all other tasks have finished. We separated the guarantees so
4271 4304 // that, if a condition is false, we can immediately find out
4272 4305 // which one.
4273 4306 guarantee(_cm->out_of_regions(), "only way to reach here");
4274 4307 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4275 4308 guarantee(_task_queue->size() == 0, "only way to reach here");
4276 4309 guarantee(!_cm->has_overflown(), "only way to reach here");
4277 4310 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4278 4311
4279 4312 if (_cm->verbose_low()) {
4280 4313 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4281 4314 }
4282 4315 } else {
4283 4316 // Apparently there's more work to do. Let's abort this task. It
4284 4317 // will restart it and we can hopefully find more things to do.
4285 4318
4286 4319 if (_cm->verbose_low()) {
4287 4320 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4288 4321 _worker_id);
4289 4322 }
4290 4323
4291 4324 set_has_aborted();
4292 4325 statsOnly( ++_aborted_termination );
4293 4326 }
4294 4327 }
4295 4328
4296 4329 // Mainly for debugging purposes to make sure that a pointer to the
4297 4330 // closure which was statically allocated in this frame doesn't
4298 4331 // escape it by accident.
4299 4332 set_cm_oop_closure(NULL);
4300 4333 double end_time_ms = os::elapsedVTime() * 1000.0;
4301 4334 double elapsed_time_ms = end_time_ms - _start_time_ms;
4302 4335 // Update the step history.
4303 4336 _step_times_ms.add(elapsed_time_ms);
4304 4337
4305 4338 if (has_aborted()) {
4306 4339 // The task was aborted for some reason.
4307 4340
4308 4341 statsOnly( ++_aborted );
4309 4342
4310 4343 if (_has_timed_out) {
4311 4344 double diff_ms = elapsed_time_ms - _time_target_ms;
4312 4345 // Keep statistics of how well we did with respect to hitting
4313 4346 // our target only if we actually timed out (if we aborted for
4314 4347 // other reasons, then the results might get skewed).
4315 4348 _marking_step_diffs_ms.add(diff_ms);
4316 4349 }
4317 4350
4318 4351 if (_cm->has_overflown()) {
4319 4352 // This is the interesting one. We aborted because a global
4320 4353 // overflow was raised. This means we have to restart the
4321 4354 // marking phase and start iterating over regions. However, in
4322 4355 // order to do this we have to make sure that all tasks stop
4323 4356 // what they are doing and re-initialise in a safe manner. We
4324 4357 // will achieve this with the use of two barrier sync points.
4325 4358
4326 4359 if (_cm->verbose_low()) {
4327 4360 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4328 4361 }
4329 4362
4330 4363 _cm->enter_first_sync_barrier(_worker_id);
4331 4364 // When we exit this sync barrier we know that all tasks have
4332 4365 // stopped doing marking work. So, it's now safe to
4333 4366 // re-initialise our data structures. At the end of this method,
4334 4367 // task 0 will clear the global data structures.
4335 4368
4336 4369 statsOnly( ++_aborted_overflow );
4337 4370
4338 4371 // We clear the local state of this task...
4339 4372 clear_region_fields();
4340 4373
4341 4374 // ...and enter the second barrier.
4342 4375 _cm->enter_second_sync_barrier(_worker_id);
4343 4376 // At this point everything has bee re-initialised and we're
4344 4377 // ready to restart.
4345 4378 }
4346 4379
4347 4380 if (_cm->verbose_low()) {
4348 4381 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4349 4382 "elapsed = %1.2lfms <<<<<<<<<<",
4350 4383 _worker_id, _time_target_ms, elapsed_time_ms);
4351 4384 if (_cm->has_aborted()) {
4352 4385 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4353 4386 _worker_id);
4354 4387 }
4355 4388 }
4356 4389 } else {
4357 4390 if (_cm->verbose_low()) {
4358 4391 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4359 4392 "elapsed = %1.2lfms <<<<<<<<<<",
4360 4393 _worker_id, _time_target_ms, elapsed_time_ms);
4361 4394 }
4362 4395 }
4363 4396
4364 4397 _claimed = false;
4365 4398 }
4366 4399
4367 4400 CMTask::CMTask(uint worker_id,
4368 4401 ConcurrentMark* cm,
4369 4402 size_t* marked_bytes,
4370 4403 BitMap* card_bm,
4371 4404 CMTaskQueue* task_queue,
4372 4405 CMTaskQueueSet* task_queues)
4373 4406 : _g1h(G1CollectedHeap::heap()),
4374 4407 _worker_id(worker_id), _cm(cm),
4375 4408 _claimed(false),
4376 4409 _nextMarkBitMap(NULL), _hash_seed(17),
4377 4410 _task_queue(task_queue),
4378 4411 _task_queues(task_queues),
4379 4412 _cm_oop_closure(NULL),
4380 4413 _marked_bytes_array(marked_bytes),
4381 4414 _card_bm(card_bm) {
4382 4415 guarantee(task_queue != NULL, "invariant");
4383 4416 guarantee(task_queues != NULL, "invariant");
4384 4417
4385 4418 statsOnly( _clock_due_to_scanning = 0;
4386 4419 _clock_due_to_marking = 0 );
4387 4420
4388 4421 _marking_step_diffs_ms.add(0.5);
4389 4422 }
4390 4423
4391 4424 // These are formatting macros that are used below to ensure
4392 4425 // consistent formatting. The *_H_* versions are used to format the
4393 4426 // header for a particular value and they should be kept consistent
4394 4427 // with the corresponding macro. Also note that most of the macros add
4395 4428 // the necessary white space (as a prefix) which makes them a bit
4396 4429 // easier to compose.
4397 4430
4398 4431 // All the output lines are prefixed with this string to be able to
4399 4432 // identify them easily in a large log file.
4400 4433 #define G1PPRL_LINE_PREFIX "###"
4401 4434
4402 4435 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4403 4436 #ifdef _LP64
4404 4437 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4405 4438 #else // _LP64
4406 4439 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4407 4440 #endif // _LP64
4408 4441
4409 4442 // For per-region info
4410 4443 #define G1PPRL_TYPE_FORMAT " %-4s"
4411 4444 #define G1PPRL_TYPE_H_FORMAT " %4s"
4412 4445 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4413 4446 #define G1PPRL_BYTE_H_FORMAT " %9s"
4414 4447 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4415 4448 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4416 4449
4417 4450 // For summary info
4418 4451 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4419 4452 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4420 4453 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4421 4454 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4422 4455
4423 4456 G1PrintRegionLivenessInfoClosure::
4424 4457 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4425 4458 : _out(out),
4426 4459 _total_used_bytes(0), _total_capacity_bytes(0),
4427 4460 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4428 4461 _hum_used_bytes(0), _hum_capacity_bytes(0),
4429 4462 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4430 4463 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4431 4464 MemRegion g1_committed = g1h->g1_committed();
4432 4465 MemRegion g1_reserved = g1h->g1_reserved();
4433 4466 double now = os::elapsedTime();
4434 4467
4435 4468 // Print the header of the output.
4436 4469 _out->cr();
4437 4470 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4438 4471 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4439 4472 G1PPRL_SUM_ADDR_FORMAT("committed")
4440 4473 G1PPRL_SUM_ADDR_FORMAT("reserved")
4441 4474 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4442 4475 g1_committed.start(), g1_committed.end(),
4443 4476 g1_reserved.start(), g1_reserved.end(),
4444 4477 HeapRegion::GrainBytes);
4445 4478 _out->print_cr(G1PPRL_LINE_PREFIX);
4446 4479 _out->print_cr(G1PPRL_LINE_PREFIX
4447 4480 G1PPRL_TYPE_H_FORMAT
4448 4481 G1PPRL_ADDR_BASE_H_FORMAT
4449 4482 G1PPRL_BYTE_H_FORMAT
4450 4483 G1PPRL_BYTE_H_FORMAT
4451 4484 G1PPRL_BYTE_H_FORMAT
4452 4485 G1PPRL_DOUBLE_H_FORMAT,
4453 4486 "type", "address-range",
4454 4487 "used", "prev-live", "next-live", "gc-eff");
4455 4488 _out->print_cr(G1PPRL_LINE_PREFIX
4456 4489 G1PPRL_TYPE_H_FORMAT
4457 4490 G1PPRL_ADDR_BASE_H_FORMAT
4458 4491 G1PPRL_BYTE_H_FORMAT
4459 4492 G1PPRL_BYTE_H_FORMAT
4460 4493 G1PPRL_BYTE_H_FORMAT
4461 4494 G1PPRL_DOUBLE_H_FORMAT,
4462 4495 "", "",
4463 4496 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4464 4497 }
4465 4498
4466 4499 // It takes as a parameter a reference to one of the _hum_* fields, it
4467 4500 // deduces the corresponding value for a region in a humongous region
4468 4501 // series (either the region size, or what's left if the _hum_* field
4469 4502 // is < the region size), and updates the _hum_* field accordingly.
4470 4503 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4471 4504 size_t bytes = 0;
4472 4505 // The > 0 check is to deal with the prev and next live bytes which
4473 4506 // could be 0.
4474 4507 if (*hum_bytes > 0) {
4475 4508 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4476 4509 *hum_bytes -= bytes;
4477 4510 }
4478 4511 return bytes;
4479 4512 }
4480 4513
4481 4514 // It deduces the values for a region in a humongous region series
4482 4515 // from the _hum_* fields and updates those accordingly. It assumes
4483 4516 // that that _hum_* fields have already been set up from the "starts
4484 4517 // humongous" region and we visit the regions in address order.
4485 4518 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4486 4519 size_t* capacity_bytes,
4487 4520 size_t* prev_live_bytes,
4488 4521 size_t* next_live_bytes) {
4489 4522 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4490 4523 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4491 4524 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4492 4525 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4493 4526 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4494 4527 }
4495 4528
4496 4529 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4497 4530 const char* type = "";
4498 4531 HeapWord* bottom = r->bottom();
4499 4532 HeapWord* end = r->end();
4500 4533 size_t capacity_bytes = r->capacity();
4501 4534 size_t used_bytes = r->used();
4502 4535 size_t prev_live_bytes = r->live_bytes();
4503 4536 size_t next_live_bytes = r->next_live_bytes();
4504 4537 double gc_eff = r->gc_efficiency();
4505 4538 if (r->used() == 0) {
4506 4539 type = "FREE";
4507 4540 } else if (r->is_survivor()) {
4508 4541 type = "SURV";
4509 4542 } else if (r->is_young()) {
4510 4543 type = "EDEN";
4511 4544 } else if (r->startsHumongous()) {
4512 4545 type = "HUMS";
4513 4546
4514 4547 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4515 4548 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4516 4549 "they should have been zeroed after the last time we used them");
4517 4550 // Set up the _hum_* fields.
4518 4551 _hum_capacity_bytes = capacity_bytes;
4519 4552 _hum_used_bytes = used_bytes;
4520 4553 _hum_prev_live_bytes = prev_live_bytes;
4521 4554 _hum_next_live_bytes = next_live_bytes;
4522 4555 get_hum_bytes(&used_bytes, &capacity_bytes,
4523 4556 &prev_live_bytes, &next_live_bytes);
4524 4557 end = bottom + HeapRegion::GrainWords;
4525 4558 } else if (r->continuesHumongous()) {
4526 4559 type = "HUMC";
4527 4560 get_hum_bytes(&used_bytes, &capacity_bytes,
4528 4561 &prev_live_bytes, &next_live_bytes);
4529 4562 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4530 4563 } else {
4531 4564 type = "OLD";
4532 4565 }
4533 4566
4534 4567 _total_used_bytes += used_bytes;
4535 4568 _total_capacity_bytes += capacity_bytes;
4536 4569 _total_prev_live_bytes += prev_live_bytes;
4537 4570 _total_next_live_bytes += next_live_bytes;
4538 4571
4539 4572 // Print a line for this particular region.
4540 4573 _out->print_cr(G1PPRL_LINE_PREFIX
4541 4574 G1PPRL_TYPE_FORMAT
4542 4575 G1PPRL_ADDR_BASE_FORMAT
4543 4576 G1PPRL_BYTE_FORMAT
4544 4577 G1PPRL_BYTE_FORMAT
4545 4578 G1PPRL_BYTE_FORMAT
4546 4579 G1PPRL_DOUBLE_FORMAT,
4547 4580 type, bottom, end,
4548 4581 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4549 4582
4550 4583 return false;
4551 4584 }
4552 4585
4553 4586 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4554 4587 // Print the footer of the output.
4555 4588 _out->print_cr(G1PPRL_LINE_PREFIX);
4556 4589 _out->print_cr(G1PPRL_LINE_PREFIX
4557 4590 " SUMMARY"
4558 4591 G1PPRL_SUM_MB_FORMAT("capacity")
4559 4592 G1PPRL_SUM_MB_PERC_FORMAT("used")
4560 4593 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4561 4594 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4562 4595 bytes_to_mb(_total_capacity_bytes),
4563 4596 bytes_to_mb(_total_used_bytes),
4564 4597 perc(_total_used_bytes, _total_capacity_bytes),
4565 4598 bytes_to_mb(_total_prev_live_bytes),
4566 4599 perc(_total_prev_live_bytes, _total_capacity_bytes),
4567 4600 bytes_to_mb(_total_next_live_bytes),
4568 4601 perc(_total_next_live_bytes, _total_capacity_bytes));
4569 4602 _out->cr();
4570 4603 }
↓ open down ↓ |
4326 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX