Print this page
rev 6875 : 8056240: Investigate increased GC remark time after class unloading changes in CRM Fuse
Reviewed-by: mgerdin, coleenp, bdelsart
Split |
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 +#include "classfile/metadataOnStackMark.hpp"
26 27 #include "classfile/symbolTable.hpp"
27 28 #include "code/codeCache.hpp"
28 29 #include "gc_implementation/g1/concurrentMark.inline.hpp"
29 30 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
30 31 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
31 32 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
32 33 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
33 34 #include "gc_implementation/g1/g1Log.hpp"
34 35 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
35 36 #include "gc_implementation/g1/g1RemSet.hpp"
36 37 #include "gc_implementation/g1/heapRegion.inline.hpp"
37 38 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
38 39 #include "gc_implementation/g1/heapRegionRemSet.hpp"
39 40 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
40 41 #include "gc_implementation/shared/vmGCOperations.hpp"
41 42 #include "gc_implementation/shared/gcTimer.hpp"
42 43 #include "gc_implementation/shared/gcTrace.hpp"
43 44 #include "gc_implementation/shared/gcTraceTime.hpp"
44 45 #include "memory/allocation.hpp"
45 46 #include "memory/genOopClosures.inline.hpp"
46 47 #include "memory/referencePolicy.hpp"
47 48 #include "memory/resourceArea.hpp"
48 49 #include "oops/oop.inline.hpp"
49 50 #include "runtime/handles.inline.hpp"
50 51 #include "runtime/java.hpp"
51 52 #include "runtime/prefetch.inline.hpp"
52 53 #include "services/memTracker.hpp"
53 54
54 55 // Concurrent marking bit map wrapper
55 56
56 57 CMBitMapRO::CMBitMapRO(int shifter) :
57 58 _bm(),
58 59 _shifter(shifter) {
59 60 _bmStartWord = 0;
60 61 _bmWordSize = 0;
61 62 }
62 63
63 64 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
64 65 const HeapWord* limit) const {
65 66 // First we must round addr *up* to a possible object boundary.
66 67 addr = (HeapWord*)align_size_up((intptr_t)addr,
67 68 HeapWordSize << _shifter);
68 69 size_t addrOffset = heapWordToOffset(addr);
69 70 if (limit == NULL) {
70 71 limit = _bmStartWord + _bmWordSize;
71 72 }
72 73 size_t limitOffset = heapWordToOffset(limit);
73 74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
74 75 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 76 assert(nextAddr >= addr, "get_next_one postcondition");
76 77 assert(nextAddr == limit || isMarked(nextAddr),
77 78 "get_next_one postcondition");
78 79 return nextAddr;
79 80 }
80 81
81 82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
82 83 const HeapWord* limit) const {
83 84 size_t addrOffset = heapWordToOffset(addr);
84 85 if (limit == NULL) {
85 86 limit = _bmStartWord + _bmWordSize;
86 87 }
87 88 size_t limitOffset = heapWordToOffset(limit);
88 89 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
89 90 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
90 91 assert(nextAddr >= addr, "get_next_one postcondition");
91 92 assert(nextAddr == limit || !isMarked(nextAddr),
92 93 "get_next_one postcondition");
93 94 return nextAddr;
94 95 }
95 96
96 97 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
97 98 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
98 99 return (int) (diff >> _shifter);
99 100 }
100 101
101 102 #ifndef PRODUCT
102 103 bool CMBitMapRO::covers(MemRegion heap_rs) const {
103 104 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
104 105 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
105 106 "size inconsistency");
106 107 return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
107 108 _bmWordSize == heap_rs.word_size();
108 109 }
109 110 #endif
110 111
111 112 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
112 113 _bm.print_on_error(st, prefix);
113 114 }
114 115
115 116 size_t CMBitMap::compute_size(size_t heap_size) {
116 117 return heap_size / mark_distance();
117 118 }
118 119
119 120 size_t CMBitMap::mark_distance() {
120 121 return MinObjAlignmentInBytes * BitsPerByte;
121 122 }
122 123
123 124 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
124 125 _bmStartWord = heap.start();
125 126 _bmWordSize = heap.word_size();
126 127
127 128 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
128 129 _bm.set_size(_bmWordSize >> _shifter);
129 130
130 131 storage->set_mapping_changed_listener(&_listener);
131 132 }
132 133
133 134 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
134 135 if (zero_filled) {
135 136 return;
136 137 }
137 138 // We need to clear the bitmap on commit, removing any existing information.
138 139 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
139 140 _bm->clearRange(mr);
140 141 }
141 142
142 143 // Closure used for clearing the given mark bitmap.
143 144 class ClearBitmapHRClosure : public HeapRegionClosure {
144 145 private:
145 146 ConcurrentMark* _cm;
146 147 CMBitMap* _bitmap;
147 148 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration.
148 149 public:
149 150 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
150 151 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
151 152 }
152 153
153 154 virtual bool doHeapRegion(HeapRegion* r) {
154 155 size_t const chunk_size_in_words = M / HeapWordSize;
155 156
156 157 HeapWord* cur = r->bottom();
157 158 HeapWord* const end = r->end();
158 159
159 160 while (cur < end) {
160 161 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
161 162 _bitmap->clearRange(mr);
162 163
163 164 cur += chunk_size_in_words;
164 165
165 166 // Abort iteration if after yielding the marking has been aborted.
166 167 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
167 168 return true;
168 169 }
169 170 // Repeat the asserts from before the start of the closure. We will do them
170 171 // as asserts here to minimize their overhead on the product. However, we
171 172 // will have them as guarantees at the beginning / end of the bitmap
172 173 // clearing to get some checking in the product.
173 174 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
174 175 assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
175 176 }
176 177
177 178 return false;
178 179 }
179 180 };
180 181
181 182 void CMBitMap::clearAll() {
182 183 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
183 184 G1CollectedHeap::heap()->heap_region_iterate(&cl);
184 185 guarantee(cl.complete(), "Must have completed iteration.");
185 186 return;
186 187 }
187 188
188 189 void CMBitMap::markRange(MemRegion mr) {
189 190 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
190 191 assert(!mr.is_empty(), "unexpected empty region");
191 192 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
192 193 ((HeapWord *) mr.end())),
193 194 "markRange memory region end is not card aligned");
194 195 // convert address range into offset range
195 196 _bm.at_put_range(heapWordToOffset(mr.start()),
196 197 heapWordToOffset(mr.end()), true);
197 198 }
198 199
199 200 void CMBitMap::clearRange(MemRegion mr) {
200 201 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
201 202 assert(!mr.is_empty(), "unexpected empty region");
202 203 // convert address range into offset range
203 204 _bm.at_put_range(heapWordToOffset(mr.start()),
204 205 heapWordToOffset(mr.end()), false);
205 206 }
206 207
207 208 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
208 209 HeapWord* end_addr) {
209 210 HeapWord* start = getNextMarkedWordAddress(addr);
210 211 start = MIN2(start, end_addr);
211 212 HeapWord* end = getNextUnmarkedWordAddress(start);
212 213 end = MIN2(end, end_addr);
213 214 assert(start <= end, "Consistency check");
214 215 MemRegion mr(start, end);
215 216 if (!mr.is_empty()) {
216 217 clearRange(mr);
217 218 }
218 219 return mr;
219 220 }
220 221
221 222 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
222 223 _base(NULL), _cm(cm)
223 224 #ifdef ASSERT
224 225 , _drain_in_progress(false)
225 226 , _drain_in_progress_yields(false)
226 227 #endif
227 228 {}
228 229
229 230 bool CMMarkStack::allocate(size_t capacity) {
230 231 // allocate a stack of the requisite depth
231 232 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
232 233 if (!rs.is_reserved()) {
233 234 warning("ConcurrentMark MarkStack allocation failure");
234 235 return false;
235 236 }
236 237 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
237 238 if (!_virtual_space.initialize(rs, rs.size())) {
238 239 warning("ConcurrentMark MarkStack backing store failure");
239 240 // Release the virtual memory reserved for the marking stack
240 241 rs.release();
241 242 return false;
242 243 }
243 244 assert(_virtual_space.committed_size() == rs.size(),
244 245 "Didn't reserve backing store for all of ConcurrentMark stack?");
245 246 _base = (oop*) _virtual_space.low();
246 247 setEmpty();
247 248 _capacity = (jint) capacity;
248 249 _saved_index = -1;
249 250 _should_expand = false;
250 251 NOT_PRODUCT(_max_depth = 0);
251 252 return true;
252 253 }
253 254
254 255 void CMMarkStack::expand() {
255 256 // Called, during remark, if we've overflown the marking stack during marking.
256 257 assert(isEmpty(), "stack should been emptied while handling overflow");
257 258 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
258 259 // Clear expansion flag
259 260 _should_expand = false;
260 261 if (_capacity == (jint) MarkStackSizeMax) {
261 262 if (PrintGCDetails && Verbose) {
262 263 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
263 264 }
264 265 return;
265 266 }
266 267 // Double capacity if possible
267 268 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
268 269 // Do not give up existing stack until we have managed to
269 270 // get the double capacity that we desired.
270 271 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
271 272 sizeof(oop)));
272 273 if (rs.is_reserved()) {
273 274 // Release the backing store associated with old stack
274 275 _virtual_space.release();
275 276 // Reinitialize virtual space for new stack
276 277 if (!_virtual_space.initialize(rs, rs.size())) {
277 278 fatal("Not enough swap for expanded marking stack capacity");
278 279 }
279 280 _base = (oop*)(_virtual_space.low());
280 281 _index = 0;
281 282 _capacity = new_capacity;
282 283 } else {
283 284 if (PrintGCDetails && Verbose) {
284 285 // Failed to double capacity, continue;
285 286 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
286 287 SIZE_FORMAT"K to " SIZE_FORMAT"K",
287 288 _capacity / K, new_capacity / K);
288 289 }
289 290 }
290 291 }
291 292
292 293 void CMMarkStack::set_should_expand() {
293 294 // If we're resetting the marking state because of an
294 295 // marking stack overflow, record that we should, if
295 296 // possible, expand the stack.
296 297 _should_expand = _cm->has_overflown();
297 298 }
298 299
299 300 CMMarkStack::~CMMarkStack() {
300 301 if (_base != NULL) {
301 302 _base = NULL;
302 303 _virtual_space.release();
303 304 }
304 305 }
305 306
306 307 void CMMarkStack::par_push(oop ptr) {
307 308 while (true) {
308 309 if (isFull()) {
309 310 _overflow = true;
310 311 return;
311 312 }
312 313 // Otherwise...
313 314 jint index = _index;
314 315 jint next_index = index+1;
315 316 jint res = Atomic::cmpxchg(next_index, &_index, index);
316 317 if (res == index) {
317 318 _base[index] = ptr;
318 319 // Note that we don't maintain this atomically. We could, but it
319 320 // doesn't seem necessary.
320 321 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
321 322 return;
322 323 }
323 324 // Otherwise, we need to try again.
324 325 }
325 326 }
326 327
327 328 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
328 329 while (true) {
329 330 if (isFull()) {
330 331 _overflow = true;
331 332 return;
332 333 }
333 334 // Otherwise...
334 335 jint index = _index;
335 336 jint next_index = index + n;
336 337 if (next_index > _capacity) {
337 338 _overflow = true;
338 339 return;
339 340 }
340 341 jint res = Atomic::cmpxchg(next_index, &_index, index);
341 342 if (res == index) {
342 343 for (int i = 0; i < n; i++) {
343 344 int ind = index + i;
344 345 assert(ind < _capacity, "By overflow test above.");
345 346 _base[ind] = ptr_arr[i];
346 347 }
347 348 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
348 349 return;
349 350 }
350 351 // Otherwise, we need to try again.
351 352 }
352 353 }
353 354
354 355 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
355 356 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
356 357 jint start = _index;
357 358 jint next_index = start + n;
358 359 if (next_index > _capacity) {
359 360 _overflow = true;
360 361 return;
361 362 }
362 363 // Otherwise.
363 364 _index = next_index;
364 365 for (int i = 0; i < n; i++) {
365 366 int ind = start + i;
366 367 assert(ind < _capacity, "By overflow test above.");
367 368 _base[ind] = ptr_arr[i];
368 369 }
369 370 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
370 371 }
371 372
372 373 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
373 374 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
374 375 jint index = _index;
375 376 if (index == 0) {
376 377 *n = 0;
377 378 return false;
378 379 } else {
379 380 int k = MIN2(max, index);
380 381 jint new_ind = index - k;
381 382 for (int j = 0; j < k; j++) {
382 383 ptr_arr[j] = _base[new_ind + j];
383 384 }
384 385 _index = new_ind;
385 386 *n = k;
386 387 return true;
387 388 }
388 389 }
389 390
390 391 template<class OopClosureClass>
391 392 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
392 393 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
393 394 || SafepointSynchronize::is_at_safepoint(),
394 395 "Drain recursion must be yield-safe.");
395 396 bool res = true;
396 397 debug_only(_drain_in_progress = true);
397 398 debug_only(_drain_in_progress_yields = yield_after);
398 399 while (!isEmpty()) {
399 400 oop newOop = pop();
400 401 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
401 402 assert(newOop->is_oop(), "Expected an oop");
402 403 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
403 404 "only grey objects on this stack");
404 405 newOop->oop_iterate(cl);
405 406 if (yield_after && _cm->do_yield_check()) {
406 407 res = false;
407 408 break;
408 409 }
409 410 }
410 411 debug_only(_drain_in_progress = false);
411 412 return res;
412 413 }
413 414
414 415 void CMMarkStack::note_start_of_gc() {
415 416 assert(_saved_index == -1,
416 417 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
417 418 _saved_index = _index;
418 419 }
419 420
420 421 void CMMarkStack::note_end_of_gc() {
421 422 // This is intentionally a guarantee, instead of an assert. If we
422 423 // accidentally add something to the mark stack during GC, it
423 424 // will be a correctness issue so it's better if we crash. we'll
424 425 // only check this once per GC anyway, so it won't be a performance
425 426 // issue in any way.
426 427 guarantee(_saved_index == _index,
427 428 err_msg("saved index: %d index: %d", _saved_index, _index));
428 429 _saved_index = -1;
429 430 }
430 431
431 432 void CMMarkStack::oops_do(OopClosure* f) {
432 433 assert(_saved_index == _index,
433 434 err_msg("saved index: %d index: %d", _saved_index, _index));
434 435 for (int i = 0; i < _index; i += 1) {
435 436 f->do_oop(&_base[i]);
436 437 }
437 438 }
438 439
439 440 CMRootRegions::CMRootRegions() :
440 441 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
441 442 _should_abort(false), _next_survivor(NULL) { }
442 443
443 444 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
444 445 _young_list = g1h->young_list();
445 446 _cm = cm;
446 447 }
447 448
448 449 void CMRootRegions::prepare_for_scan() {
449 450 assert(!scan_in_progress(), "pre-condition");
450 451
451 452 // Currently, only survivors can be root regions.
452 453 assert(_next_survivor == NULL, "pre-condition");
453 454 _next_survivor = _young_list->first_survivor_region();
454 455 _scan_in_progress = (_next_survivor != NULL);
455 456 _should_abort = false;
456 457 }
457 458
458 459 HeapRegion* CMRootRegions::claim_next() {
459 460 if (_should_abort) {
460 461 // If someone has set the should_abort flag, we return NULL to
461 462 // force the caller to bail out of their loop.
462 463 return NULL;
463 464 }
464 465
465 466 // Currently, only survivors can be root regions.
466 467 HeapRegion* res = _next_survivor;
467 468 if (res != NULL) {
468 469 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
469 470 // Read it again in case it changed while we were waiting for the lock.
470 471 res = _next_survivor;
471 472 if (res != NULL) {
472 473 if (res == _young_list->last_survivor_region()) {
473 474 // We just claimed the last survivor so store NULL to indicate
474 475 // that we're done.
475 476 _next_survivor = NULL;
476 477 } else {
477 478 _next_survivor = res->get_next_young_region();
478 479 }
479 480 } else {
480 481 // Someone else claimed the last survivor while we were trying
481 482 // to take the lock so nothing else to do.
482 483 }
483 484 }
484 485 assert(res == NULL || res->is_survivor(), "post-condition");
485 486
486 487 return res;
487 488 }
488 489
489 490 void CMRootRegions::scan_finished() {
490 491 assert(scan_in_progress(), "pre-condition");
491 492
492 493 // Currently, only survivors can be root regions.
493 494 if (!_should_abort) {
494 495 assert(_next_survivor == NULL, "we should have claimed all survivors");
495 496 }
496 497 _next_survivor = NULL;
497 498
498 499 {
499 500 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
500 501 _scan_in_progress = false;
501 502 RootRegionScan_lock->notify_all();
502 503 }
503 504 }
504 505
505 506 bool CMRootRegions::wait_until_scan_finished() {
506 507 if (!scan_in_progress()) return false;
507 508
508 509 {
509 510 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
510 511 while (scan_in_progress()) {
511 512 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
512 513 }
513 514 }
514 515 return true;
515 516 }
516 517
517 518 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
518 519 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
519 520 #endif // _MSC_VER
520 521
521 522 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
522 523 return MAX2((n_par_threads + 2) / 4, 1U);
523 524 }
524 525
525 526 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
526 527 _g1h(g1h),
527 528 _markBitMap1(),
528 529 _markBitMap2(),
529 530 _parallel_marking_threads(0),
530 531 _max_parallel_marking_threads(0),
531 532 _sleep_factor(0.0),
532 533 _marking_task_overhead(1.0),
533 534 _cleanup_sleep_factor(0.0),
534 535 _cleanup_task_overhead(1.0),
535 536 _cleanup_list("Cleanup List"),
536 537 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
537 538 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
538 539 CardTableModRefBS::card_shift,
539 540 false /* in_resource_area*/),
540 541
541 542 _prevMarkBitMap(&_markBitMap1),
542 543 _nextMarkBitMap(&_markBitMap2),
543 544
544 545 _markStack(this),
545 546 // _finger set in set_non_marking_state
546 547
547 548 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
548 549 // _active_tasks set in set_non_marking_state
549 550 // _tasks set inside the constructor
550 551 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
551 552 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
552 553
553 554 _has_overflown(false),
554 555 _concurrent(false),
555 556 _has_aborted(false),
556 557 _aborted_gc_id(GCId::undefined()),
557 558 _restart_for_overflow(false),
558 559 _concurrent_marking_in_progress(false),
559 560
560 561 // _verbose_level set below
561 562
562 563 _init_times(),
563 564 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
564 565 _cleanup_times(),
565 566 _total_counting_time(0.0),
566 567 _total_rs_scrub_time(0.0),
567 568
568 569 _parallel_workers(NULL),
569 570
570 571 _count_card_bitmaps(NULL),
571 572 _count_marked_bytes(NULL),
572 573 _completed_initialization(false) {
573 574 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
574 575 if (verbose_level < no_verbose) {
575 576 verbose_level = no_verbose;
576 577 }
577 578 if (verbose_level > high_verbose) {
578 579 verbose_level = high_verbose;
579 580 }
580 581 _verbose_level = verbose_level;
581 582
582 583 if (verbose_low()) {
583 584 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
584 585 "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
585 586 }
586 587
587 588 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
588 589 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
589 590
590 591 // Create & start a ConcurrentMark thread.
591 592 _cmThread = new ConcurrentMarkThread(this);
592 593 assert(cmThread() != NULL, "CM Thread should have been created");
593 594 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
594 595 if (_cmThread->osthread() == NULL) {
595 596 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
596 597 }
597 598
598 599 assert(CGC_lock != NULL, "Where's the CGC_lock?");
599 600 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
600 601 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
601 602
602 603 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
603 604 satb_qs.set_buffer_size(G1SATBBufferSize);
604 605
605 606 _root_regions.init(_g1h, this);
606 607
607 608 if (ConcGCThreads > ParallelGCThreads) {
608 609 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
609 610 "than ParallelGCThreads (" UINTX_FORMAT ").",
610 611 ConcGCThreads, ParallelGCThreads);
611 612 return;
612 613 }
613 614 if (ParallelGCThreads == 0) {
614 615 // if we are not running with any parallel GC threads we will not
615 616 // spawn any marking threads either
616 617 _parallel_marking_threads = 0;
617 618 _max_parallel_marking_threads = 0;
618 619 _sleep_factor = 0.0;
619 620 _marking_task_overhead = 1.0;
620 621 } else {
621 622 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
622 623 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
623 624 // if both are set
624 625 _sleep_factor = 0.0;
625 626 _marking_task_overhead = 1.0;
626 627 } else if (G1MarkingOverheadPercent > 0) {
627 628 // We will calculate the number of parallel marking threads based
628 629 // on a target overhead with respect to the soft real-time goal
629 630 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
630 631 double overall_cm_overhead =
631 632 (double) MaxGCPauseMillis * marking_overhead /
632 633 (double) GCPauseIntervalMillis;
633 634 double cpu_ratio = 1.0 / (double) os::processor_count();
634 635 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
635 636 double marking_task_overhead =
636 637 overall_cm_overhead / marking_thread_num *
637 638 (double) os::processor_count();
638 639 double sleep_factor =
639 640 (1.0 - marking_task_overhead) / marking_task_overhead;
640 641
641 642 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
642 643 _sleep_factor = sleep_factor;
643 644 _marking_task_overhead = marking_task_overhead;
644 645 } else {
645 646 // Calculate the number of parallel marking threads by scaling
646 647 // the number of parallel GC threads.
647 648 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
648 649 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
649 650 _sleep_factor = 0.0;
650 651 _marking_task_overhead = 1.0;
651 652 }
652 653
653 654 assert(ConcGCThreads > 0, "Should have been set");
654 655 _parallel_marking_threads = (uint) ConcGCThreads;
655 656 _max_parallel_marking_threads = _parallel_marking_threads;
656 657
657 658 if (parallel_marking_threads() > 1) {
658 659 _cleanup_task_overhead = 1.0;
659 660 } else {
660 661 _cleanup_task_overhead = marking_task_overhead();
661 662 }
662 663 _cleanup_sleep_factor =
663 664 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
664 665
665 666 #if 0
666 667 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
667 668 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
668 669 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
669 670 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
670 671 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
671 672 #endif
672 673
673 674 guarantee(parallel_marking_threads() > 0, "peace of mind");
674 675 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
675 676 _max_parallel_marking_threads, false, true);
676 677 if (_parallel_workers == NULL) {
677 678 vm_exit_during_initialization("Failed necessary allocation.");
678 679 } else {
679 680 _parallel_workers->initialize_workers();
680 681 }
681 682 }
682 683
683 684 if (FLAG_IS_DEFAULT(MarkStackSize)) {
684 685 uintx mark_stack_size =
685 686 MIN2(MarkStackSizeMax,
686 687 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
687 688 // Verify that the calculated value for MarkStackSize is in range.
688 689 // It would be nice to use the private utility routine from Arguments.
689 690 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
690 691 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
691 692 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
692 693 mark_stack_size, (uintx) 1, MarkStackSizeMax);
693 694 return;
694 695 }
695 696 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
696 697 } else {
697 698 // Verify MarkStackSize is in range.
698 699 if (FLAG_IS_CMDLINE(MarkStackSize)) {
699 700 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
700 701 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
701 702 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
702 703 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
703 704 MarkStackSize, (uintx) 1, MarkStackSizeMax);
704 705 return;
705 706 }
706 707 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
707 708 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
708 709 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
709 710 " or for MarkStackSizeMax (" UINTX_FORMAT ")",
710 711 MarkStackSize, MarkStackSizeMax);
711 712 return;
712 713 }
713 714 }
714 715 }
715 716 }
716 717
717 718 if (!_markStack.allocate(MarkStackSize)) {
718 719 warning("Failed to allocate CM marking stack");
719 720 return;
720 721 }
721 722
722 723 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
723 724 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
724 725
725 726 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
726 727 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
727 728
728 729 BitMap::idx_t card_bm_size = _card_bm.size();
729 730
730 731 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
731 732 _active_tasks = _max_worker_id;
732 733
733 734 size_t max_regions = (size_t) _g1h->max_regions();
734 735 for (uint i = 0; i < _max_worker_id; ++i) {
735 736 CMTaskQueue* task_queue = new CMTaskQueue();
736 737 task_queue->initialize();
737 738 _task_queues->register_queue(i, task_queue);
738 739
739 740 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
740 741 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
741 742
742 743 _tasks[i] = new CMTask(i, this,
743 744 _count_marked_bytes[i],
744 745 &_count_card_bitmaps[i],
745 746 task_queue, _task_queues);
746 747
747 748 _accum_task_vtime[i] = 0.0;
748 749 }
749 750
750 751 // Calculate the card number for the bottom of the heap. Used
751 752 // in biasing indexes into the accounting card bitmaps.
752 753 _heap_bottom_card_num =
753 754 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
754 755 CardTableModRefBS::card_shift);
755 756
756 757 // Clear all the liveness counting data
757 758 clear_all_count_data();
758 759
759 760 // so that the call below can read a sensible value
760 761 _heap_start = g1h->reserved_region().start();
761 762 set_non_marking_state();
762 763 _completed_initialization = true;
763 764 }
764 765
765 766 void ConcurrentMark::reset() {
766 767 // Starting values for these two. This should be called in a STW
767 768 // phase.
768 769 MemRegion reserved = _g1h->g1_reserved();
769 770 _heap_start = reserved.start();
770 771 _heap_end = reserved.end();
771 772
772 773 // Separated the asserts so that we know which one fires.
773 774 assert(_heap_start != NULL, "heap bounds should look ok");
774 775 assert(_heap_end != NULL, "heap bounds should look ok");
775 776 assert(_heap_start < _heap_end, "heap bounds should look ok");
776 777
777 778 // Reset all the marking data structures and any necessary flags
778 779 reset_marking_state();
779 780
780 781 if (verbose_low()) {
781 782 gclog_or_tty->print_cr("[global] resetting");
782 783 }
783 784
784 785 // We do reset all of them, since different phases will use
785 786 // different number of active threads. So, it's easiest to have all
786 787 // of them ready.
787 788 for (uint i = 0; i < _max_worker_id; ++i) {
788 789 _tasks[i]->reset(_nextMarkBitMap);
789 790 }
790 791
791 792 // we need this to make sure that the flag is on during the evac
792 793 // pause with initial mark piggy-backed
793 794 set_concurrent_marking_in_progress();
794 795 }
795 796
796 797
797 798 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
798 799 _markStack.set_should_expand();
799 800 _markStack.setEmpty(); // Also clears the _markStack overflow flag
800 801 if (clear_overflow) {
801 802 clear_has_overflown();
802 803 } else {
803 804 assert(has_overflown(), "pre-condition");
804 805 }
805 806 _finger = _heap_start;
806 807
807 808 for (uint i = 0; i < _max_worker_id; ++i) {
808 809 CMTaskQueue* queue = _task_queues->queue(i);
809 810 queue->set_empty();
810 811 }
811 812 }
812 813
813 814 void ConcurrentMark::set_concurrency(uint active_tasks) {
814 815 assert(active_tasks <= _max_worker_id, "we should not have more");
815 816
816 817 _active_tasks = active_tasks;
817 818 // Need to update the three data structures below according to the
818 819 // number of active threads for this phase.
819 820 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
820 821 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
821 822 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
822 823 }
823 824
824 825 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
825 826 set_concurrency(active_tasks);
826 827
827 828 _concurrent = concurrent;
828 829 // We propagate this to all tasks, not just the active ones.
829 830 for (uint i = 0; i < _max_worker_id; ++i)
830 831 _tasks[i]->set_concurrent(concurrent);
831 832
832 833 if (concurrent) {
833 834 set_concurrent_marking_in_progress();
834 835 } else {
835 836 // We currently assume that the concurrent flag has been set to
836 837 // false before we start remark. At this point we should also be
837 838 // in a STW phase.
838 839 assert(!concurrent_marking_in_progress(), "invariant");
839 840 assert(out_of_regions(),
840 841 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
841 842 p2i(_finger), p2i(_heap_end)));
842 843 }
843 844 }
844 845
845 846 void ConcurrentMark::set_non_marking_state() {
846 847 // We set the global marking state to some default values when we're
847 848 // not doing marking.
848 849 reset_marking_state();
849 850 _active_tasks = 0;
850 851 clear_concurrent_marking_in_progress();
851 852 }
852 853
853 854 ConcurrentMark::~ConcurrentMark() {
854 855 // The ConcurrentMark instance is never freed.
855 856 ShouldNotReachHere();
856 857 }
857 858
858 859 void ConcurrentMark::clearNextBitmap() {
859 860 G1CollectedHeap* g1h = G1CollectedHeap::heap();
860 861
861 862 // Make sure that the concurrent mark thread looks to still be in
862 863 // the current cycle.
863 864 guarantee(cmThread()->during_cycle(), "invariant");
864 865
865 866 // We are finishing up the current cycle by clearing the next
866 867 // marking bitmap and getting it ready for the next cycle. During
867 868 // this time no other cycle can start. So, let's make sure that this
868 869 // is the case.
869 870 guarantee(!g1h->mark_in_progress(), "invariant");
870 871
871 872 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
872 873 g1h->heap_region_iterate(&cl);
873 874
874 875 // Clear the liveness counting data. If the marking has been aborted, the abort()
875 876 // call already did that.
876 877 if (cl.complete()) {
877 878 clear_all_count_data();
878 879 }
879 880
880 881 // Repeat the asserts from above.
881 882 guarantee(cmThread()->during_cycle(), "invariant");
882 883 guarantee(!g1h->mark_in_progress(), "invariant");
883 884 }
884 885
885 886 class CheckBitmapClearHRClosure : public HeapRegionClosure {
886 887 CMBitMap* _bitmap;
887 888 bool _error;
888 889 public:
889 890 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
890 891 }
891 892
892 893 virtual bool doHeapRegion(HeapRegion* r) {
893 894 // This closure can be called concurrently to the mutator, so we must make sure
894 895 // that the result of the getNextMarkedWordAddress() call is compared to the
895 896 // value passed to it as limit to detect any found bits.
896 897 // We can use the region's orig_end() for the limit and the comparison value
897 898 // as it always contains the "real" end of the region that never changes and
898 899 // has no side effects.
899 900 // Due to the latter, there can also be no problem with the compiler generating
900 901 // reloads of the orig_end() call.
901 902 HeapWord* end = r->orig_end();
902 903 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
903 904 }
904 905 };
905 906
906 907 bool ConcurrentMark::nextMarkBitmapIsClear() {
907 908 CheckBitmapClearHRClosure cl(_nextMarkBitMap);
908 909 _g1h->heap_region_iterate(&cl);
909 910 return cl.complete();
910 911 }
911 912
912 913 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
913 914 public:
914 915 bool doHeapRegion(HeapRegion* r) {
915 916 if (!r->continuesHumongous()) {
916 917 r->note_start_of_marking();
917 918 }
918 919 return false;
919 920 }
920 921 };
921 922
922 923 void ConcurrentMark::checkpointRootsInitialPre() {
923 924 G1CollectedHeap* g1h = G1CollectedHeap::heap();
924 925 G1CollectorPolicy* g1p = g1h->g1_policy();
925 926
926 927 _has_aborted = false;
927 928
928 929 #ifndef PRODUCT
929 930 if (G1PrintReachableAtInitialMark) {
930 931 print_reachable("at-cycle-start",
931 932 VerifyOption_G1UsePrevMarking, true /* all */);
932 933 }
933 934 #endif
934 935
935 936 // Initialise marking structures. This has to be done in a STW phase.
936 937 reset();
937 938
938 939 // For each region note start of marking.
939 940 NoteStartOfMarkHRClosure startcl;
940 941 g1h->heap_region_iterate(&startcl);
941 942 }
942 943
943 944
944 945 void ConcurrentMark::checkpointRootsInitialPost() {
945 946 G1CollectedHeap* g1h = G1CollectedHeap::heap();
946 947
947 948 // If we force an overflow during remark, the remark operation will
948 949 // actually abort and we'll restart concurrent marking. If we always
949 950 // force an oveflow during remark we'll never actually complete the
950 951 // marking phase. So, we initilize this here, at the start of the
951 952 // cycle, so that at the remaining overflow number will decrease at
952 953 // every remark and we'll eventually not need to cause one.
953 954 force_overflow_stw()->init();
954 955
955 956 // Start Concurrent Marking weak-reference discovery.
956 957 ReferenceProcessor* rp = g1h->ref_processor_cm();
957 958 // enable ("weak") refs discovery
958 959 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
959 960 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
960 961
961 962 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
962 963 // This is the start of the marking cycle, we're expected all
963 964 // threads to have SATB queues with active set to false.
964 965 satb_mq_set.set_active_all_threads(true, /* new active value */
965 966 false /* expected_active */);
966 967
967 968 _root_regions.prepare_for_scan();
968 969
969 970 // update_g1_committed() will be called at the end of an evac pause
970 971 // when marking is on. So, it's also called at the end of the
971 972 // initial-mark pause to update the heap end, if the heap expands
972 973 // during it. No need to call it here.
973 974 }
974 975
975 976 /*
976 977 * Notice that in the next two methods, we actually leave the STS
977 978 * during the barrier sync and join it immediately afterwards. If we
978 979 * do not do this, the following deadlock can occur: one thread could
979 980 * be in the barrier sync code, waiting for the other thread to also
980 981 * sync up, whereas another one could be trying to yield, while also
981 982 * waiting for the other threads to sync up too.
982 983 *
983 984 * Note, however, that this code is also used during remark and in
984 985 * this case we should not attempt to leave / enter the STS, otherwise
985 986 * we'll either hit an asseert (debug / fastdebug) or deadlock
986 987 * (product). So we should only leave / enter the STS if we are
987 988 * operating concurrently.
988 989 *
989 990 * Because the thread that does the sync barrier has left the STS, it
990 991 * is possible to be suspended for a Full GC or an evacuation pause
991 992 * could occur. This is actually safe, since the entering the sync
992 993 * barrier is one of the last things do_marking_step() does, and it
993 994 * doesn't manipulate any data structures afterwards.
994 995 */
995 996
996 997 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
997 998 if (verbose_low()) {
998 999 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
999 1000 }
1000 1001
1001 1002 if (concurrent()) {
1002 1003 SuspendibleThreadSet::leave();
1003 1004 }
1004 1005
1005 1006 bool barrier_aborted = !_first_overflow_barrier_sync.enter();
1006 1007
1007 1008 if (concurrent()) {
1008 1009 SuspendibleThreadSet::join();
1009 1010 }
1010 1011 // at this point everyone should have synced up and not be doing any
1011 1012 // more work
1012 1013
1013 1014 if (verbose_low()) {
1014 1015 if (barrier_aborted) {
1015 1016 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1016 1017 } else {
1017 1018 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1018 1019 }
1019 1020 }
1020 1021
1021 1022 if (barrier_aborted) {
1022 1023 // If the barrier aborted we ignore the overflow condition and
1023 1024 // just abort the whole marking phase as quickly as possible.
1024 1025 return;
1025 1026 }
1026 1027
1027 1028 // If we're executing the concurrent phase of marking, reset the marking
1028 1029 // state; otherwise the marking state is reset after reference processing,
1029 1030 // during the remark pause.
1030 1031 // If we reset here as a result of an overflow during the remark we will
1031 1032 // see assertion failures from any subsequent set_concurrency_and_phase()
1032 1033 // calls.
1033 1034 if (concurrent()) {
1034 1035 // let the task associated with with worker 0 do this
1035 1036 if (worker_id == 0) {
1036 1037 // task 0 is responsible for clearing the global data structures
1037 1038 // We should be here because of an overflow. During STW we should
1038 1039 // not clear the overflow flag since we rely on it being true when
1039 1040 // we exit this method to abort the pause and restart concurent
1040 1041 // marking.
1041 1042 reset_marking_state(true /* clear_overflow */);
1042 1043 force_overflow()->update();
1043 1044
1044 1045 if (G1Log::fine()) {
1045 1046 gclog_or_tty->gclog_stamp(concurrent_gc_id());
1046 1047 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1047 1048 }
1048 1049 }
1049 1050 }
1050 1051
1051 1052 // after this, each task should reset its own data structures then
1052 1053 // then go into the second barrier
1053 1054 }
1054 1055
1055 1056 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1056 1057 if (verbose_low()) {
1057 1058 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1058 1059 }
1059 1060
1060 1061 if (concurrent()) {
1061 1062 SuspendibleThreadSet::leave();
1062 1063 }
1063 1064
1064 1065 bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1065 1066
1066 1067 if (concurrent()) {
1067 1068 SuspendibleThreadSet::join();
1068 1069 }
1069 1070 // at this point everything should be re-initialized and ready to go
1070 1071
1071 1072 if (verbose_low()) {
1072 1073 if (barrier_aborted) {
1073 1074 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1074 1075 } else {
1075 1076 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1076 1077 }
1077 1078 }
1078 1079 }
1079 1080
1080 1081 #ifndef PRODUCT
1081 1082 void ForceOverflowSettings::init() {
1082 1083 _num_remaining = G1ConcMarkForceOverflow;
1083 1084 _force = false;
1084 1085 update();
1085 1086 }
1086 1087
1087 1088 void ForceOverflowSettings::update() {
1088 1089 if (_num_remaining > 0) {
1089 1090 _num_remaining -= 1;
1090 1091 _force = true;
1091 1092 } else {
1092 1093 _force = false;
1093 1094 }
1094 1095 }
1095 1096
1096 1097 bool ForceOverflowSettings::should_force() {
1097 1098 if (_force) {
1098 1099 _force = false;
1099 1100 return true;
1100 1101 } else {
1101 1102 return false;
1102 1103 }
1103 1104 }
1104 1105 #endif // !PRODUCT
1105 1106
1106 1107 class CMConcurrentMarkingTask: public AbstractGangTask {
1107 1108 private:
1108 1109 ConcurrentMark* _cm;
1109 1110 ConcurrentMarkThread* _cmt;
1110 1111
1111 1112 public:
1112 1113 void work(uint worker_id) {
1113 1114 assert(Thread::current()->is_ConcurrentGC_thread(),
1114 1115 "this should only be done by a conc GC thread");
1115 1116 ResourceMark rm;
1116 1117
1117 1118 double start_vtime = os::elapsedVTime();
1118 1119
1119 1120 SuspendibleThreadSet::join();
1120 1121
1121 1122 assert(worker_id < _cm->active_tasks(), "invariant");
1122 1123 CMTask* the_task = _cm->task(worker_id);
1123 1124 the_task->record_start_time();
1124 1125 if (!_cm->has_aborted()) {
1125 1126 do {
1126 1127 double start_vtime_sec = os::elapsedVTime();
1127 1128 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1128 1129
1129 1130 the_task->do_marking_step(mark_step_duration_ms,
1130 1131 true /* do_termination */,
1131 1132 false /* is_serial*/);
1132 1133
1133 1134 double end_vtime_sec = os::elapsedVTime();
1134 1135 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1135 1136 _cm->clear_has_overflown();
1136 1137
1137 1138 _cm->do_yield_check(worker_id);
1138 1139
1139 1140 jlong sleep_time_ms;
1140 1141 if (!_cm->has_aborted() && the_task->has_aborted()) {
1141 1142 sleep_time_ms =
1142 1143 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1143 1144 SuspendibleThreadSet::leave();
1144 1145 os::sleep(Thread::current(), sleep_time_ms, false);
1145 1146 SuspendibleThreadSet::join();
1146 1147 }
1147 1148 } while (!_cm->has_aborted() && the_task->has_aborted());
1148 1149 }
1149 1150 the_task->record_end_time();
1150 1151 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1151 1152
1152 1153 SuspendibleThreadSet::leave();
1153 1154
1154 1155 double end_vtime = os::elapsedVTime();
1155 1156 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1156 1157 }
1157 1158
1158 1159 CMConcurrentMarkingTask(ConcurrentMark* cm,
1159 1160 ConcurrentMarkThread* cmt) :
1160 1161 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1161 1162
1162 1163 ~CMConcurrentMarkingTask() { }
1163 1164 };
1164 1165
1165 1166 // Calculates the number of active workers for a concurrent
1166 1167 // phase.
1167 1168 uint ConcurrentMark::calc_parallel_marking_threads() {
1168 1169 if (G1CollectedHeap::use_parallel_gc_threads()) {
1169 1170 uint n_conc_workers = 0;
1170 1171 if (!UseDynamicNumberOfGCThreads ||
1171 1172 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1172 1173 !ForceDynamicNumberOfGCThreads)) {
1173 1174 n_conc_workers = max_parallel_marking_threads();
1174 1175 } else {
1175 1176 n_conc_workers =
1176 1177 AdaptiveSizePolicy::calc_default_active_workers(
1177 1178 max_parallel_marking_threads(),
1178 1179 1, /* Minimum workers */
1179 1180 parallel_marking_threads(),
1180 1181 Threads::number_of_non_daemon_threads());
1181 1182 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1182 1183 // that scaling has already gone into "_max_parallel_marking_threads".
1183 1184 }
1184 1185 assert(n_conc_workers > 0, "Always need at least 1");
1185 1186 return n_conc_workers;
1186 1187 }
1187 1188 // If we are not running with any parallel GC threads we will not
1188 1189 // have spawned any marking threads either. Hence the number of
1189 1190 // concurrent workers should be 0.
1190 1191 return 0;
1191 1192 }
1192 1193
1193 1194 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1194 1195 // Currently, only survivors can be root regions.
1195 1196 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1196 1197 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1197 1198
1198 1199 const uintx interval = PrefetchScanIntervalInBytes;
1199 1200 HeapWord* curr = hr->bottom();
1200 1201 const HeapWord* end = hr->top();
1201 1202 while (curr < end) {
1202 1203 Prefetch::read(curr, interval);
1203 1204 oop obj = oop(curr);
1204 1205 int size = obj->oop_iterate(&cl);
1205 1206 assert(size == obj->size(), "sanity");
1206 1207 curr += size;
1207 1208 }
1208 1209 }
1209 1210
1210 1211 class CMRootRegionScanTask : public AbstractGangTask {
1211 1212 private:
1212 1213 ConcurrentMark* _cm;
1213 1214
1214 1215 public:
1215 1216 CMRootRegionScanTask(ConcurrentMark* cm) :
1216 1217 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1217 1218
1218 1219 void work(uint worker_id) {
1219 1220 assert(Thread::current()->is_ConcurrentGC_thread(),
1220 1221 "this should only be done by a conc GC thread");
1221 1222
1222 1223 CMRootRegions* root_regions = _cm->root_regions();
1223 1224 HeapRegion* hr = root_regions->claim_next();
1224 1225 while (hr != NULL) {
1225 1226 _cm->scanRootRegion(hr, worker_id);
1226 1227 hr = root_regions->claim_next();
1227 1228 }
1228 1229 }
1229 1230 };
1230 1231
1231 1232 void ConcurrentMark::scanRootRegions() {
1232 1233 // Start of concurrent marking.
1233 1234 ClassLoaderDataGraph::clear_claimed_marks();
1234 1235
1235 1236 // scan_in_progress() will have been set to true only if there was
1236 1237 // at least one root region to scan. So, if it's false, we
1237 1238 // should not attempt to do any further work.
1238 1239 if (root_regions()->scan_in_progress()) {
1239 1240 _parallel_marking_threads = calc_parallel_marking_threads();
1240 1241 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1241 1242 "Maximum number of marking threads exceeded");
1242 1243 uint active_workers = MAX2(1U, parallel_marking_threads());
1243 1244
1244 1245 CMRootRegionScanTask task(this);
1245 1246 if (use_parallel_marking_threads()) {
1246 1247 _parallel_workers->set_active_workers((int) active_workers);
1247 1248 _parallel_workers->run_task(&task);
1248 1249 } else {
1249 1250 task.work(0);
1250 1251 }
1251 1252
1252 1253 // It's possible that has_aborted() is true here without actually
1253 1254 // aborting the survivor scan earlier. This is OK as it's
1254 1255 // mainly used for sanity checking.
1255 1256 root_regions()->scan_finished();
1256 1257 }
1257 1258 }
1258 1259
1259 1260 void ConcurrentMark::markFromRoots() {
1260 1261 // we might be tempted to assert that:
1261 1262 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1262 1263 // "inconsistent argument?");
1263 1264 // However that wouldn't be right, because it's possible that
1264 1265 // a safepoint is indeed in progress as a younger generation
1265 1266 // stop-the-world GC happens even as we mark in this generation.
1266 1267
1267 1268 _restart_for_overflow = false;
1268 1269 force_overflow_conc()->init();
1269 1270
1270 1271 // _g1h has _n_par_threads
1271 1272 _parallel_marking_threads = calc_parallel_marking_threads();
1272 1273 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1273 1274 "Maximum number of marking threads exceeded");
1274 1275
1275 1276 uint active_workers = MAX2(1U, parallel_marking_threads());
1276 1277
1277 1278 // Parallel task terminator is set in "set_concurrency_and_phase()"
1278 1279 set_concurrency_and_phase(active_workers, true /* concurrent */);
1279 1280
1280 1281 CMConcurrentMarkingTask markingTask(this, cmThread());
1281 1282 if (use_parallel_marking_threads()) {
1282 1283 _parallel_workers->set_active_workers((int)active_workers);
1283 1284 // Don't set _n_par_threads because it affects MT in process_roots()
1284 1285 // and the decisions on that MT processing is made elsewhere.
1285 1286 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1286 1287 _parallel_workers->run_task(&markingTask);
1287 1288 } else {
1288 1289 markingTask.work(0);
1289 1290 }
1290 1291 print_stats();
1291 1292 }
1292 1293
1293 1294 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1294 1295 // world is stopped at this checkpoint
1295 1296 assert(SafepointSynchronize::is_at_safepoint(),
1296 1297 "world should be stopped");
1297 1298
1298 1299 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1299 1300
1300 1301 // If a full collection has happened, we shouldn't do this.
1301 1302 if (has_aborted()) {
1302 1303 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1303 1304 return;
1304 1305 }
1305 1306
1306 1307 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1307 1308
1308 1309 if (VerifyDuringGC) {
1309 1310 HandleMark hm; // handle scope
1310 1311 Universe::heap()->prepare_for_verify();
1311 1312 Universe::verify(VerifyOption_G1UsePrevMarking,
1312 1313 " VerifyDuringGC:(before)");
1313 1314 }
1314 1315 g1h->check_bitmaps("Remark Start");
1315 1316
1316 1317 G1CollectorPolicy* g1p = g1h->g1_policy();
1317 1318 g1p->record_concurrent_mark_remark_start();
1318 1319
1319 1320 double start = os::elapsedTime();
1320 1321
1321 1322 checkpointRootsFinalWork();
1322 1323
1323 1324 double mark_work_end = os::elapsedTime();
1324 1325
1325 1326 weakRefsWork(clear_all_soft_refs);
1326 1327
1327 1328 if (has_overflown()) {
1328 1329 // Oops. We overflowed. Restart concurrent marking.
1329 1330 _restart_for_overflow = true;
1330 1331 if (G1TraceMarkStackOverflow) {
1331 1332 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1332 1333 }
1333 1334
1334 1335 // Verify the heap w.r.t. the previous marking bitmap.
1335 1336 if (VerifyDuringGC) {
1336 1337 HandleMark hm; // handle scope
1337 1338 Universe::heap()->prepare_for_verify();
1338 1339 Universe::verify(VerifyOption_G1UsePrevMarking,
1339 1340 " VerifyDuringGC:(overflow)");
1340 1341 }
1341 1342
1342 1343 // Clear the marking state because we will be restarting
1343 1344 // marking due to overflowing the global mark stack.
1344 1345 reset_marking_state();
1345 1346 } else {
1346 1347 // Aggregate the per-task counting data that we have accumulated
1347 1348 // while marking.
1348 1349 aggregate_count_data();
1349 1350
1350 1351 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1351 1352 // We're done with marking.
1352 1353 // This is the end of the marking cycle, we're expected all
1353 1354 // threads to have SATB queues with active set to true.
1354 1355 satb_mq_set.set_active_all_threads(false, /* new active value */
1355 1356 true /* expected_active */);
1356 1357
1357 1358 if (VerifyDuringGC) {
1358 1359 HandleMark hm; // handle scope
1359 1360 Universe::heap()->prepare_for_verify();
1360 1361 Universe::verify(VerifyOption_G1UseNextMarking,
1361 1362 " VerifyDuringGC:(after)");
1362 1363 }
1363 1364 g1h->check_bitmaps("Remark End");
1364 1365 assert(!restart_for_overflow(), "sanity");
1365 1366 // Completely reset the marking state since marking completed
1366 1367 set_non_marking_state();
1367 1368 }
1368 1369
1369 1370 // Expand the marking stack, if we have to and if we can.
1370 1371 if (_markStack.should_expand()) {
1371 1372 _markStack.expand();
1372 1373 }
1373 1374
1374 1375 // Statistics
1375 1376 double now = os::elapsedTime();
1376 1377 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1377 1378 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1378 1379 _remark_times.add((now - start) * 1000.0);
1379 1380
1380 1381 g1p->record_concurrent_mark_remark_end();
1381 1382
1382 1383 G1CMIsAliveClosure is_alive(g1h);
1383 1384 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1384 1385 }
1385 1386
1386 1387 // Base class of the closures that finalize and verify the
1387 1388 // liveness counting data.
1388 1389 class CMCountDataClosureBase: public HeapRegionClosure {
1389 1390 protected:
1390 1391 G1CollectedHeap* _g1h;
1391 1392 ConcurrentMark* _cm;
1392 1393 CardTableModRefBS* _ct_bs;
1393 1394
1394 1395 BitMap* _region_bm;
1395 1396 BitMap* _card_bm;
1396 1397
1397 1398 // Takes a region that's not empty (i.e., it has at least one
1398 1399 // live object in it and sets its corresponding bit on the region
1399 1400 // bitmap to 1. If the region is "starts humongous" it will also set
1400 1401 // to 1 the bits on the region bitmap that correspond to its
1401 1402 // associated "continues humongous" regions.
1402 1403 void set_bit_for_region(HeapRegion* hr) {
1403 1404 assert(!hr->continuesHumongous(), "should have filtered those out");
1404 1405
1405 1406 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1406 1407 if (!hr->startsHumongous()) {
1407 1408 // Normal (non-humongous) case: just set the bit.
1408 1409 _region_bm->par_at_put(index, true);
1409 1410 } else {
1410 1411 // Starts humongous case: calculate how many regions are part of
1411 1412 // this humongous region and then set the bit range.
1412 1413 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1413 1414 _region_bm->par_at_put_range(index, end_index, true);
1414 1415 }
1415 1416 }
1416 1417
1417 1418 public:
1418 1419 CMCountDataClosureBase(G1CollectedHeap* g1h,
1419 1420 BitMap* region_bm, BitMap* card_bm):
1420 1421 _g1h(g1h), _cm(g1h->concurrent_mark()),
1421 1422 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1422 1423 _region_bm(region_bm), _card_bm(card_bm) { }
1423 1424 };
1424 1425
1425 1426 // Closure that calculates the # live objects per region. Used
1426 1427 // for verification purposes during the cleanup pause.
1427 1428 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1428 1429 CMBitMapRO* _bm;
1429 1430 size_t _region_marked_bytes;
1430 1431
1431 1432 public:
1432 1433 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1433 1434 BitMap* region_bm, BitMap* card_bm) :
1434 1435 CMCountDataClosureBase(g1h, region_bm, card_bm),
1435 1436 _bm(bm), _region_marked_bytes(0) { }
1436 1437
1437 1438 bool doHeapRegion(HeapRegion* hr) {
1438 1439
1439 1440 if (hr->continuesHumongous()) {
1440 1441 // We will ignore these here and process them when their
1441 1442 // associated "starts humongous" region is processed (see
1442 1443 // set_bit_for_heap_region()). Note that we cannot rely on their
1443 1444 // associated "starts humongous" region to have their bit set to
1444 1445 // 1 since, due to the region chunking in the parallel region
1445 1446 // iteration, a "continues humongous" region might be visited
1446 1447 // before its associated "starts humongous".
1447 1448 return false;
1448 1449 }
1449 1450
1450 1451 HeapWord* ntams = hr->next_top_at_mark_start();
1451 1452 HeapWord* start = hr->bottom();
1452 1453
1453 1454 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1454 1455 err_msg("Preconditions not met - "
1455 1456 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1456 1457 p2i(start), p2i(ntams), p2i(hr->end())));
1457 1458
1458 1459 // Find the first marked object at or after "start".
1459 1460 start = _bm->getNextMarkedWordAddress(start, ntams);
1460 1461
1461 1462 size_t marked_bytes = 0;
1462 1463
1463 1464 while (start < ntams) {
1464 1465 oop obj = oop(start);
1465 1466 int obj_sz = obj->size();
1466 1467 HeapWord* obj_end = start + obj_sz;
1467 1468
1468 1469 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1469 1470 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1470 1471
1471 1472 // Note: if we're looking at the last region in heap - obj_end
1472 1473 // could be actually just beyond the end of the heap; end_idx
1473 1474 // will then correspond to a (non-existent) card that is also
1474 1475 // just beyond the heap.
1475 1476 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1476 1477 // end of object is not card aligned - increment to cover
1477 1478 // all the cards spanned by the object
1478 1479 end_idx += 1;
1479 1480 }
1480 1481
1481 1482 // Set the bits in the card BM for the cards spanned by this object.
1482 1483 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1483 1484
1484 1485 // Add the size of this object to the number of marked bytes.
1485 1486 marked_bytes += (size_t)obj_sz * HeapWordSize;
1486 1487
1487 1488 // Find the next marked object after this one.
1488 1489 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1489 1490 }
1490 1491
1491 1492 // Mark the allocated-since-marking portion...
1492 1493 HeapWord* top = hr->top();
1493 1494 if (ntams < top) {
1494 1495 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1495 1496 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1496 1497
1497 1498 // Note: if we're looking at the last region in heap - top
1498 1499 // could be actually just beyond the end of the heap; end_idx
1499 1500 // will then correspond to a (non-existent) card that is also
1500 1501 // just beyond the heap.
1501 1502 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1502 1503 // end of object is not card aligned - increment to cover
1503 1504 // all the cards spanned by the object
1504 1505 end_idx += 1;
1505 1506 }
1506 1507 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1507 1508
1508 1509 // This definitely means the region has live objects.
1509 1510 set_bit_for_region(hr);
1510 1511 }
1511 1512
1512 1513 // Update the live region bitmap.
1513 1514 if (marked_bytes > 0) {
1514 1515 set_bit_for_region(hr);
1515 1516 }
1516 1517
1517 1518 // Set the marked bytes for the current region so that
1518 1519 // it can be queried by a calling verificiation routine
1519 1520 _region_marked_bytes = marked_bytes;
1520 1521
1521 1522 return false;
1522 1523 }
1523 1524
1524 1525 size_t region_marked_bytes() const { return _region_marked_bytes; }
1525 1526 };
1526 1527
1527 1528 // Heap region closure used for verifying the counting data
1528 1529 // that was accumulated concurrently and aggregated during
1529 1530 // the remark pause. This closure is applied to the heap
1530 1531 // regions during the STW cleanup pause.
1531 1532
1532 1533 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1533 1534 G1CollectedHeap* _g1h;
1534 1535 ConcurrentMark* _cm;
1535 1536 CalcLiveObjectsClosure _calc_cl;
1536 1537 BitMap* _region_bm; // Region BM to be verified
1537 1538 BitMap* _card_bm; // Card BM to be verified
1538 1539 bool _verbose; // verbose output?
1539 1540
1540 1541 BitMap* _exp_region_bm; // Expected Region BM values
1541 1542 BitMap* _exp_card_bm; // Expected card BM values
1542 1543
1543 1544 int _failures;
1544 1545
1545 1546 public:
1546 1547 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1547 1548 BitMap* region_bm,
1548 1549 BitMap* card_bm,
1549 1550 BitMap* exp_region_bm,
1550 1551 BitMap* exp_card_bm,
1551 1552 bool verbose) :
1552 1553 _g1h(g1h), _cm(g1h->concurrent_mark()),
1553 1554 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1554 1555 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1555 1556 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1556 1557 _failures(0) { }
1557 1558
1558 1559 int failures() const { return _failures; }
1559 1560
1560 1561 bool doHeapRegion(HeapRegion* hr) {
1561 1562 if (hr->continuesHumongous()) {
1562 1563 // We will ignore these here and process them when their
1563 1564 // associated "starts humongous" region is processed (see
1564 1565 // set_bit_for_heap_region()). Note that we cannot rely on their
1565 1566 // associated "starts humongous" region to have their bit set to
1566 1567 // 1 since, due to the region chunking in the parallel region
1567 1568 // iteration, a "continues humongous" region might be visited
1568 1569 // before its associated "starts humongous".
1569 1570 return false;
1570 1571 }
1571 1572
1572 1573 int failures = 0;
1573 1574
1574 1575 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1575 1576 // this region and set the corresponding bits in the expected region
1576 1577 // and card bitmaps.
1577 1578 bool res = _calc_cl.doHeapRegion(hr);
1578 1579 assert(res == false, "should be continuing");
1579 1580
1580 1581 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1581 1582 Mutex::_no_safepoint_check_flag);
1582 1583
1583 1584 // Verify the marked bytes for this region.
1584 1585 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1585 1586 size_t act_marked_bytes = hr->next_marked_bytes();
1586 1587
1587 1588 // We're not OK if expected marked bytes > actual marked bytes. It means
1588 1589 // we have missed accounting some objects during the actual marking.
1589 1590 if (exp_marked_bytes > act_marked_bytes) {
1590 1591 if (_verbose) {
1591 1592 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1592 1593 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1593 1594 hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1594 1595 }
1595 1596 failures += 1;
1596 1597 }
1597 1598
1598 1599 // Verify the bit, for this region, in the actual and expected
1599 1600 // (which was just calculated) region bit maps.
1600 1601 // We're not OK if the bit in the calculated expected region
1601 1602 // bitmap is set and the bit in the actual region bitmap is not.
1602 1603 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1603 1604
1604 1605 bool expected = _exp_region_bm->at(index);
1605 1606 bool actual = _region_bm->at(index);
1606 1607 if (expected && !actual) {
1607 1608 if (_verbose) {
1608 1609 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1609 1610 "expected: %s, actual: %s",
1610 1611 hr->hrm_index(),
1611 1612 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1612 1613 }
1613 1614 failures += 1;
1614 1615 }
1615 1616
1616 1617 // Verify that the card bit maps for the cards spanned by the current
1617 1618 // region match. We have an error if we have a set bit in the expected
1618 1619 // bit map and the corresponding bit in the actual bitmap is not set.
1619 1620
1620 1621 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1621 1622 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1622 1623
1623 1624 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1624 1625 expected = _exp_card_bm->at(i);
1625 1626 actual = _card_bm->at(i);
1626 1627
1627 1628 if (expected && !actual) {
1628 1629 if (_verbose) {
1629 1630 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1630 1631 "expected: %s, actual: %s",
1631 1632 hr->hrm_index(), i,
1632 1633 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1633 1634 }
1634 1635 failures += 1;
1635 1636 }
1636 1637 }
1637 1638
1638 1639 if (failures > 0 && _verbose) {
1639 1640 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1640 1641 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1641 1642 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1642 1643 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1643 1644 }
1644 1645
1645 1646 _failures += failures;
1646 1647
1647 1648 // We could stop iteration over the heap when we
1648 1649 // find the first violating region by returning true.
1649 1650 return false;
1650 1651 }
1651 1652 };
1652 1653
1653 1654 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1654 1655 protected:
1655 1656 G1CollectedHeap* _g1h;
1656 1657 ConcurrentMark* _cm;
1657 1658 BitMap* _actual_region_bm;
1658 1659 BitMap* _actual_card_bm;
1659 1660
1660 1661 uint _n_workers;
1661 1662
1662 1663 BitMap* _expected_region_bm;
1663 1664 BitMap* _expected_card_bm;
1664 1665
1665 1666 int _failures;
1666 1667 bool _verbose;
1667 1668
1668 1669 public:
1669 1670 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1670 1671 BitMap* region_bm, BitMap* card_bm,
1671 1672 BitMap* expected_region_bm, BitMap* expected_card_bm)
1672 1673 : AbstractGangTask("G1 verify final counting"),
1673 1674 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1674 1675 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1675 1676 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1676 1677 _failures(0), _verbose(false),
1677 1678 _n_workers(0) {
1678 1679 assert(VerifyDuringGC, "don't call this otherwise");
1679 1680
1680 1681 // Use the value already set as the number of active threads
1681 1682 // in the call to run_task().
1682 1683 if (G1CollectedHeap::use_parallel_gc_threads()) {
1683 1684 assert( _g1h->workers()->active_workers() > 0,
1684 1685 "Should have been previously set");
1685 1686 _n_workers = _g1h->workers()->active_workers();
1686 1687 } else {
1687 1688 _n_workers = 1;
1688 1689 }
1689 1690
1690 1691 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1691 1692 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1692 1693
1693 1694 _verbose = _cm->verbose_medium();
1694 1695 }
1695 1696
1696 1697 void work(uint worker_id) {
1697 1698 assert(worker_id < _n_workers, "invariant");
1698 1699
1699 1700 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1700 1701 _actual_region_bm, _actual_card_bm,
1701 1702 _expected_region_bm,
1702 1703 _expected_card_bm,
1703 1704 _verbose);
1704 1705
1705 1706 if (G1CollectedHeap::use_parallel_gc_threads()) {
1706 1707 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1707 1708 worker_id,
1708 1709 _n_workers,
1709 1710 HeapRegion::VerifyCountClaimValue);
1710 1711 } else {
1711 1712 _g1h->heap_region_iterate(&verify_cl);
1712 1713 }
1713 1714
1714 1715 Atomic::add(verify_cl.failures(), &_failures);
1715 1716 }
1716 1717
1717 1718 int failures() const { return _failures; }
1718 1719 };
1719 1720
1720 1721 // Closure that finalizes the liveness counting data.
1721 1722 // Used during the cleanup pause.
1722 1723 // Sets the bits corresponding to the interval [NTAMS, top]
1723 1724 // (which contains the implicitly live objects) in the
1724 1725 // card liveness bitmap. Also sets the bit for each region,
1725 1726 // containing live data, in the region liveness bitmap.
1726 1727
1727 1728 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1728 1729 public:
1729 1730 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1730 1731 BitMap* region_bm,
1731 1732 BitMap* card_bm) :
1732 1733 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1733 1734
1734 1735 bool doHeapRegion(HeapRegion* hr) {
1735 1736
1736 1737 if (hr->continuesHumongous()) {
1737 1738 // We will ignore these here and process them when their
1738 1739 // associated "starts humongous" region is processed (see
1739 1740 // set_bit_for_heap_region()). Note that we cannot rely on their
1740 1741 // associated "starts humongous" region to have their bit set to
1741 1742 // 1 since, due to the region chunking in the parallel region
1742 1743 // iteration, a "continues humongous" region might be visited
1743 1744 // before its associated "starts humongous".
1744 1745 return false;
1745 1746 }
1746 1747
1747 1748 HeapWord* ntams = hr->next_top_at_mark_start();
1748 1749 HeapWord* top = hr->top();
1749 1750
1750 1751 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1751 1752
1752 1753 // Mark the allocated-since-marking portion...
1753 1754 if (ntams < top) {
1754 1755 // This definitely means the region has live objects.
1755 1756 set_bit_for_region(hr);
1756 1757
1757 1758 // Now set the bits in the card bitmap for [ntams, top)
1758 1759 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1759 1760 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1760 1761
1761 1762 // Note: if we're looking at the last region in heap - top
1762 1763 // could be actually just beyond the end of the heap; end_idx
1763 1764 // will then correspond to a (non-existent) card that is also
1764 1765 // just beyond the heap.
1765 1766 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1766 1767 // end of object is not card aligned - increment to cover
1767 1768 // all the cards spanned by the object
1768 1769 end_idx += 1;
1769 1770 }
1770 1771
1771 1772 assert(end_idx <= _card_bm->size(),
1772 1773 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1773 1774 end_idx, _card_bm->size()));
1774 1775 assert(start_idx < _card_bm->size(),
1775 1776 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1776 1777 start_idx, _card_bm->size()));
1777 1778
1778 1779 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1779 1780 }
1780 1781
1781 1782 // Set the bit for the region if it contains live data
1782 1783 if (hr->next_marked_bytes() > 0) {
1783 1784 set_bit_for_region(hr);
1784 1785 }
1785 1786
1786 1787 return false;
1787 1788 }
1788 1789 };
1789 1790
1790 1791 class G1ParFinalCountTask: public AbstractGangTask {
1791 1792 protected:
1792 1793 G1CollectedHeap* _g1h;
1793 1794 ConcurrentMark* _cm;
1794 1795 BitMap* _actual_region_bm;
1795 1796 BitMap* _actual_card_bm;
1796 1797
1797 1798 uint _n_workers;
1798 1799
1799 1800 public:
1800 1801 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1801 1802 : AbstractGangTask("G1 final counting"),
1802 1803 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1803 1804 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1804 1805 _n_workers(0) {
1805 1806 // Use the value already set as the number of active threads
1806 1807 // in the call to run_task().
1807 1808 if (G1CollectedHeap::use_parallel_gc_threads()) {
1808 1809 assert( _g1h->workers()->active_workers() > 0,
1809 1810 "Should have been previously set");
1810 1811 _n_workers = _g1h->workers()->active_workers();
1811 1812 } else {
1812 1813 _n_workers = 1;
1813 1814 }
1814 1815 }
1815 1816
1816 1817 void work(uint worker_id) {
1817 1818 assert(worker_id < _n_workers, "invariant");
1818 1819
1819 1820 FinalCountDataUpdateClosure final_update_cl(_g1h,
1820 1821 _actual_region_bm,
1821 1822 _actual_card_bm);
1822 1823
1823 1824 if (G1CollectedHeap::use_parallel_gc_threads()) {
1824 1825 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1825 1826 worker_id,
1826 1827 _n_workers,
1827 1828 HeapRegion::FinalCountClaimValue);
1828 1829 } else {
1829 1830 _g1h->heap_region_iterate(&final_update_cl);
1830 1831 }
1831 1832 }
1832 1833 };
1833 1834
1834 1835 class G1ParNoteEndTask;
1835 1836
1836 1837 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1837 1838 G1CollectedHeap* _g1;
1838 1839 size_t _max_live_bytes;
1839 1840 uint _regions_claimed;
1840 1841 size_t _freed_bytes;
1841 1842 FreeRegionList* _local_cleanup_list;
1842 1843 HeapRegionSetCount _old_regions_removed;
1843 1844 HeapRegionSetCount _humongous_regions_removed;
1844 1845 HRRSCleanupTask* _hrrs_cleanup_task;
1845 1846 double _claimed_region_time;
1846 1847 double _max_region_time;
1847 1848
1848 1849 public:
1849 1850 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1850 1851 FreeRegionList* local_cleanup_list,
1851 1852 HRRSCleanupTask* hrrs_cleanup_task) :
1852 1853 _g1(g1),
1853 1854 _max_live_bytes(0), _regions_claimed(0),
1854 1855 _freed_bytes(0),
1855 1856 _claimed_region_time(0.0), _max_region_time(0.0),
1856 1857 _local_cleanup_list(local_cleanup_list),
1857 1858 _old_regions_removed(),
1858 1859 _humongous_regions_removed(),
1859 1860 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1860 1861
1861 1862 size_t freed_bytes() { return _freed_bytes; }
1862 1863 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1863 1864 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1864 1865
1865 1866 bool doHeapRegion(HeapRegion *hr) {
1866 1867 if (hr->continuesHumongous()) {
1867 1868 return false;
1868 1869 }
1869 1870 // We use a claim value of zero here because all regions
1870 1871 // were claimed with value 1 in the FinalCount task.
1871 1872 _g1->reset_gc_time_stamps(hr);
1872 1873 double start = os::elapsedTime();
1873 1874 _regions_claimed++;
1874 1875 hr->note_end_of_marking();
1875 1876 _max_live_bytes += hr->max_live_bytes();
1876 1877
1877 1878 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1878 1879 _freed_bytes += hr->used();
1879 1880 hr->set_containing_set(NULL);
1880 1881 if (hr->isHumongous()) {
1881 1882 assert(hr->startsHumongous(), "we should only see starts humongous");
1882 1883 _humongous_regions_removed.increment(1u, hr->capacity());
1883 1884 _g1->free_humongous_region(hr, _local_cleanup_list, true);
1884 1885 } else {
1885 1886 _old_regions_removed.increment(1u, hr->capacity());
1886 1887 _g1->free_region(hr, _local_cleanup_list, true);
1887 1888 }
1888 1889 } else {
1889 1890 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1890 1891 }
1891 1892
1892 1893 double region_time = (os::elapsedTime() - start);
1893 1894 _claimed_region_time += region_time;
1894 1895 if (region_time > _max_region_time) {
1895 1896 _max_region_time = region_time;
1896 1897 }
1897 1898 return false;
1898 1899 }
1899 1900
1900 1901 size_t max_live_bytes() { return _max_live_bytes; }
1901 1902 uint regions_claimed() { return _regions_claimed; }
1902 1903 double claimed_region_time_sec() { return _claimed_region_time; }
1903 1904 double max_region_time_sec() { return _max_region_time; }
1904 1905 };
1905 1906
1906 1907 class G1ParNoteEndTask: public AbstractGangTask {
1907 1908 friend class G1NoteEndOfConcMarkClosure;
1908 1909
1909 1910 protected:
1910 1911 G1CollectedHeap* _g1h;
1911 1912 size_t _max_live_bytes;
1912 1913 size_t _freed_bytes;
1913 1914 FreeRegionList* _cleanup_list;
1914 1915
1915 1916 public:
1916 1917 G1ParNoteEndTask(G1CollectedHeap* g1h,
1917 1918 FreeRegionList* cleanup_list) :
1918 1919 AbstractGangTask("G1 note end"), _g1h(g1h),
1919 1920 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1920 1921
1921 1922 void work(uint worker_id) {
1922 1923 double start = os::elapsedTime();
1923 1924 FreeRegionList local_cleanup_list("Local Cleanup List");
1924 1925 HRRSCleanupTask hrrs_cleanup_task;
1925 1926 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1926 1927 &hrrs_cleanup_task);
1927 1928 if (G1CollectedHeap::use_parallel_gc_threads()) {
1928 1929 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1929 1930 _g1h->workers()->active_workers(),
1930 1931 HeapRegion::NoteEndClaimValue);
1931 1932 } else {
1932 1933 _g1h->heap_region_iterate(&g1_note_end);
1933 1934 }
1934 1935 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1935 1936
1936 1937 // Now update the lists
1937 1938 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1938 1939 {
1939 1940 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1940 1941 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1941 1942 _max_live_bytes += g1_note_end.max_live_bytes();
1942 1943 _freed_bytes += g1_note_end.freed_bytes();
1943 1944
1944 1945 // If we iterate over the global cleanup list at the end of
1945 1946 // cleanup to do this printing we will not guarantee to only
1946 1947 // generate output for the newly-reclaimed regions (the list
1947 1948 // might not be empty at the beginning of cleanup; we might
1948 1949 // still be working on its previous contents). So we do the
1949 1950 // printing here, before we append the new regions to the global
1950 1951 // cleanup list.
1951 1952
1952 1953 G1HRPrinter* hr_printer = _g1h->hr_printer();
1953 1954 if (hr_printer->is_active()) {
1954 1955 FreeRegionListIterator iter(&local_cleanup_list);
1955 1956 while (iter.more_available()) {
1956 1957 HeapRegion* hr = iter.get_next();
1957 1958 hr_printer->cleanup(hr);
1958 1959 }
1959 1960 }
1960 1961
1961 1962 _cleanup_list->add_ordered(&local_cleanup_list);
1962 1963 assert(local_cleanup_list.is_empty(), "post-condition");
1963 1964
1964 1965 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1965 1966 }
1966 1967 }
1967 1968 size_t max_live_bytes() { return _max_live_bytes; }
1968 1969 size_t freed_bytes() { return _freed_bytes; }
1969 1970 };
1970 1971
1971 1972 class G1ParScrubRemSetTask: public AbstractGangTask {
1972 1973 protected:
1973 1974 G1RemSet* _g1rs;
1974 1975 BitMap* _region_bm;
1975 1976 BitMap* _card_bm;
1976 1977 public:
1977 1978 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1978 1979 BitMap* region_bm, BitMap* card_bm) :
1979 1980 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1980 1981 _region_bm(region_bm), _card_bm(card_bm) { }
1981 1982
1982 1983 void work(uint worker_id) {
1983 1984 if (G1CollectedHeap::use_parallel_gc_threads()) {
1984 1985 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1985 1986 HeapRegion::ScrubRemSetClaimValue);
1986 1987 } else {
1987 1988 _g1rs->scrub(_region_bm, _card_bm);
1988 1989 }
1989 1990 }
1990 1991
1991 1992 };
1992 1993
1993 1994 void ConcurrentMark::cleanup() {
1994 1995 // world is stopped at this checkpoint
1995 1996 assert(SafepointSynchronize::is_at_safepoint(),
1996 1997 "world should be stopped");
1997 1998 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1998 1999
1999 2000 // If a full collection has happened, we shouldn't do this.
2000 2001 if (has_aborted()) {
2001 2002 g1h->set_marking_complete(); // So bitmap clearing isn't confused
2002 2003 return;
2003 2004 }
2004 2005
2005 2006 g1h->verify_region_sets_optional();
2006 2007
2007 2008 if (VerifyDuringGC) {
2008 2009 HandleMark hm; // handle scope
2009 2010 Universe::heap()->prepare_for_verify();
2010 2011 Universe::verify(VerifyOption_G1UsePrevMarking,
2011 2012 " VerifyDuringGC:(before)");
2012 2013 }
2013 2014 g1h->check_bitmaps("Cleanup Start");
2014 2015
2015 2016 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2016 2017 g1p->record_concurrent_mark_cleanup_start();
2017 2018
2018 2019 double start = os::elapsedTime();
2019 2020
2020 2021 HeapRegionRemSet::reset_for_cleanup_tasks();
2021 2022
2022 2023 uint n_workers;
2023 2024
2024 2025 // Do counting once more with the world stopped for good measure.
2025 2026 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2026 2027
2027 2028 if (G1CollectedHeap::use_parallel_gc_threads()) {
2028 2029 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2029 2030 "sanity check");
2030 2031
2031 2032 g1h->set_par_threads();
2032 2033 n_workers = g1h->n_par_threads();
2033 2034 assert(g1h->n_par_threads() == n_workers,
2034 2035 "Should not have been reset");
2035 2036 g1h->workers()->run_task(&g1_par_count_task);
2036 2037 // Done with the parallel phase so reset to 0.
2037 2038 g1h->set_par_threads(0);
2038 2039
2039 2040 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2040 2041 "sanity check");
2041 2042 } else {
2042 2043 n_workers = 1;
2043 2044 g1_par_count_task.work(0);
2044 2045 }
2045 2046
2046 2047 if (VerifyDuringGC) {
2047 2048 // Verify that the counting data accumulated during marking matches
2048 2049 // that calculated by walking the marking bitmap.
2049 2050
2050 2051 // Bitmaps to hold expected values
2051 2052 BitMap expected_region_bm(_region_bm.size(), true);
2052 2053 BitMap expected_card_bm(_card_bm.size(), true);
2053 2054
2054 2055 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2055 2056 &_region_bm,
2056 2057 &_card_bm,
2057 2058 &expected_region_bm,
2058 2059 &expected_card_bm);
2059 2060
2060 2061 if (G1CollectedHeap::use_parallel_gc_threads()) {
2061 2062 g1h->set_par_threads((int)n_workers);
2062 2063 g1h->workers()->run_task(&g1_par_verify_task);
2063 2064 // Done with the parallel phase so reset to 0.
2064 2065 g1h->set_par_threads(0);
2065 2066
2066 2067 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2067 2068 "sanity check");
2068 2069 } else {
2069 2070 g1_par_verify_task.work(0);
2070 2071 }
2071 2072
2072 2073 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2073 2074 }
2074 2075
2075 2076 size_t start_used_bytes = g1h->used();
2076 2077 g1h->set_marking_complete();
2077 2078
2078 2079 double count_end = os::elapsedTime();
2079 2080 double this_final_counting_time = (count_end - start);
2080 2081 _total_counting_time += this_final_counting_time;
2081 2082
2082 2083 if (G1PrintRegionLivenessInfo) {
2083 2084 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2084 2085 _g1h->heap_region_iterate(&cl);
2085 2086 }
2086 2087
2087 2088 // Install newly created mark bitMap as "prev".
2088 2089 swapMarkBitMaps();
2089 2090
2090 2091 g1h->reset_gc_time_stamp();
2091 2092
2092 2093 // Note end of marking in all heap regions.
2093 2094 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2094 2095 if (G1CollectedHeap::use_parallel_gc_threads()) {
2095 2096 g1h->set_par_threads((int)n_workers);
2096 2097 g1h->workers()->run_task(&g1_par_note_end_task);
2097 2098 g1h->set_par_threads(0);
2098 2099
2099 2100 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2100 2101 "sanity check");
2101 2102 } else {
2102 2103 g1_par_note_end_task.work(0);
2103 2104 }
2104 2105 g1h->check_gc_time_stamps();
2105 2106
2106 2107 if (!cleanup_list_is_empty()) {
2107 2108 // The cleanup list is not empty, so we'll have to process it
2108 2109 // concurrently. Notify anyone else that might be wanting free
2109 2110 // regions that there will be more free regions coming soon.
2110 2111 g1h->set_free_regions_coming();
2111 2112 }
2112 2113
2113 2114 // call below, since it affects the metric by which we sort the heap
2114 2115 // regions.
2115 2116 if (G1ScrubRemSets) {
2116 2117 double rs_scrub_start = os::elapsedTime();
2117 2118 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2118 2119 if (G1CollectedHeap::use_parallel_gc_threads()) {
2119 2120 g1h->set_par_threads((int)n_workers);
2120 2121 g1h->workers()->run_task(&g1_par_scrub_rs_task);
2121 2122 g1h->set_par_threads(0);
2122 2123
2123 2124 assert(g1h->check_heap_region_claim_values(
2124 2125 HeapRegion::ScrubRemSetClaimValue),
2125 2126 "sanity check");
2126 2127 } else {
2127 2128 g1_par_scrub_rs_task.work(0);
2128 2129 }
2129 2130
2130 2131 double rs_scrub_end = os::elapsedTime();
2131 2132 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2132 2133 _total_rs_scrub_time += this_rs_scrub_time;
2133 2134 }
2134 2135
2135 2136 // this will also free any regions totally full of garbage objects,
2136 2137 // and sort the regions.
2137 2138 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2138 2139
2139 2140 // Statistics.
2140 2141 double end = os::elapsedTime();
2141 2142 _cleanup_times.add((end - start) * 1000.0);
2142 2143
2143 2144 if (G1Log::fine()) {
2144 2145 g1h->print_size_transition(gclog_or_tty,
2145 2146 start_used_bytes,
2146 2147 g1h->used(),
2147 2148 g1h->capacity());
2148 2149 }
2149 2150
2150 2151 // Clean up will have freed any regions completely full of garbage.
2151 2152 // Update the soft reference policy with the new heap occupancy.
2152 2153 Universe::update_heap_info_at_gc();
2153 2154
2154 2155 if (VerifyDuringGC) {
2155 2156 HandleMark hm; // handle scope
2156 2157 Universe::heap()->prepare_for_verify();
2157 2158 Universe::verify(VerifyOption_G1UsePrevMarking,
2158 2159 " VerifyDuringGC:(after)");
2159 2160 }
2160 2161 g1h->check_bitmaps("Cleanup End");
2161 2162
2162 2163 g1h->verify_region_sets_optional();
2163 2164
2164 2165 // We need to make this be a "collection" so any collection pause that
2165 2166 // races with it goes around and waits for completeCleanup to finish.
2166 2167 g1h->increment_total_collections();
2167 2168
2168 2169 // Clean out dead classes and update Metaspace sizes.
2169 2170 if (ClassUnloadingWithConcurrentMark) {
2170 2171 ClassLoaderDataGraph::purge();
2171 2172 }
2172 2173 MetaspaceGC::compute_new_size();
2173 2174
2174 2175 // We reclaimed old regions so we should calculate the sizes to make
2175 2176 // sure we update the old gen/space data.
2176 2177 g1h->g1mm()->update_sizes();
2177 2178
2178 2179 g1h->trace_heap_after_concurrent_cycle();
2179 2180 }
2180 2181
2181 2182 void ConcurrentMark::completeCleanup() {
2182 2183 if (has_aborted()) return;
2183 2184
2184 2185 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2185 2186
2186 2187 _cleanup_list.verify_optional();
2187 2188 FreeRegionList tmp_free_list("Tmp Free List");
2188 2189
2189 2190 if (G1ConcRegionFreeingVerbose) {
2190 2191 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2191 2192 "cleanup list has %u entries",
2192 2193 _cleanup_list.length());
2193 2194 }
2194 2195
2195 2196 // No one else should be accessing the _cleanup_list at this point,
2196 2197 // so it is not necessary to take any locks
2197 2198 while (!_cleanup_list.is_empty()) {
2198 2199 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2199 2200 assert(hr != NULL, "Got NULL from a non-empty list");
2200 2201 hr->par_clear();
2201 2202 tmp_free_list.add_ordered(hr);
2202 2203
2203 2204 // Instead of adding one region at a time to the secondary_free_list,
2204 2205 // we accumulate them in the local list and move them a few at a
2205 2206 // time. This also cuts down on the number of notify_all() calls
2206 2207 // we do during this process. We'll also append the local list when
2207 2208 // _cleanup_list is empty (which means we just removed the last
2208 2209 // region from the _cleanup_list).
2209 2210 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2210 2211 _cleanup_list.is_empty()) {
2211 2212 if (G1ConcRegionFreeingVerbose) {
2212 2213 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2213 2214 "appending %u entries to the secondary_free_list, "
2214 2215 "cleanup list still has %u entries",
2215 2216 tmp_free_list.length(),
2216 2217 _cleanup_list.length());
2217 2218 }
2218 2219
2219 2220 {
2220 2221 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2221 2222 g1h->secondary_free_list_add(&tmp_free_list);
2222 2223 SecondaryFreeList_lock->notify_all();
2223 2224 }
2224 2225
2225 2226 if (G1StressConcRegionFreeing) {
2226 2227 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2227 2228 os::sleep(Thread::current(), (jlong) 1, false);
2228 2229 }
2229 2230 }
2230 2231 }
2231 2232 }
2232 2233 assert(tmp_free_list.is_empty(), "post-condition");
2233 2234 }
2234 2235
2235 2236 // Supporting Object and Oop closures for reference discovery
2236 2237 // and processing in during marking
2237 2238
2238 2239 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2239 2240 HeapWord* addr = (HeapWord*)obj;
2240 2241 return addr != NULL &&
2241 2242 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2242 2243 }
2243 2244
2244 2245 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2245 2246 // Uses the CMTask associated with a worker thread (for serial reference
2246 2247 // processing the CMTask for worker 0 is used) to preserve (mark) and
2247 2248 // trace referent objects.
2248 2249 //
2249 2250 // Using the CMTask and embedded local queues avoids having the worker
2250 2251 // threads operating on the global mark stack. This reduces the risk
2251 2252 // of overflowing the stack - which we would rather avoid at this late
2252 2253 // state. Also using the tasks' local queues removes the potential
2253 2254 // of the workers interfering with each other that could occur if
2254 2255 // operating on the global stack.
2255 2256
2256 2257 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2257 2258 ConcurrentMark* _cm;
2258 2259 CMTask* _task;
2259 2260 int _ref_counter_limit;
2260 2261 int _ref_counter;
2261 2262 bool _is_serial;
2262 2263 public:
2263 2264 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2264 2265 _cm(cm), _task(task), _is_serial(is_serial),
2265 2266 _ref_counter_limit(G1RefProcDrainInterval) {
2266 2267 assert(_ref_counter_limit > 0, "sanity");
2267 2268 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2268 2269 _ref_counter = _ref_counter_limit;
2269 2270 }
2270 2271
2271 2272 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2272 2273 virtual void do_oop( oop* p) { do_oop_work(p); }
2273 2274
2274 2275 template <class T> void do_oop_work(T* p) {
2275 2276 if (!_cm->has_overflown()) {
2276 2277 oop obj = oopDesc::load_decode_heap_oop(p);
2277 2278 if (_cm->verbose_high()) {
2278 2279 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2279 2280 "*"PTR_FORMAT" = "PTR_FORMAT,
2280 2281 _task->worker_id(), p2i(p), p2i((void*) obj));
2281 2282 }
2282 2283
2283 2284 _task->deal_with_reference(obj);
2284 2285 _ref_counter--;
2285 2286
2286 2287 if (_ref_counter == 0) {
2287 2288 // We have dealt with _ref_counter_limit references, pushing them
2288 2289 // and objects reachable from them on to the local stack (and
2289 2290 // possibly the global stack). Call CMTask::do_marking_step() to
2290 2291 // process these entries.
2291 2292 //
2292 2293 // We call CMTask::do_marking_step() in a loop, which we'll exit if
2293 2294 // there's nothing more to do (i.e. we're done with the entries that
2294 2295 // were pushed as a result of the CMTask::deal_with_reference() calls
2295 2296 // above) or we overflow.
2296 2297 //
2297 2298 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2298 2299 // flag while there may still be some work to do. (See the comment at
2299 2300 // the beginning of CMTask::do_marking_step() for those conditions -
2300 2301 // one of which is reaching the specified time target.) It is only
2301 2302 // when CMTask::do_marking_step() returns without setting the
2302 2303 // has_aborted() flag that the marking step has completed.
2303 2304 do {
2304 2305 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2305 2306 _task->do_marking_step(mark_step_duration_ms,
2306 2307 false /* do_termination */,
2307 2308 _is_serial);
2308 2309 } while (_task->has_aborted() && !_cm->has_overflown());
2309 2310 _ref_counter = _ref_counter_limit;
2310 2311 }
2311 2312 } else {
2312 2313 if (_cm->verbose_high()) {
2313 2314 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2314 2315 }
2315 2316 }
2316 2317 }
2317 2318 };
2318 2319
2319 2320 // 'Drain' oop closure used by both serial and parallel reference processing.
2320 2321 // Uses the CMTask associated with a given worker thread (for serial
2321 2322 // reference processing the CMtask for worker 0 is used). Calls the
2322 2323 // do_marking_step routine, with an unbelievably large timeout value,
2323 2324 // to drain the marking data structures of the remaining entries
2324 2325 // added by the 'keep alive' oop closure above.
2325 2326
2326 2327 class G1CMDrainMarkingStackClosure: public VoidClosure {
2327 2328 ConcurrentMark* _cm;
2328 2329 CMTask* _task;
2329 2330 bool _is_serial;
2330 2331 public:
2331 2332 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2332 2333 _cm(cm), _task(task), _is_serial(is_serial) {
2333 2334 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2334 2335 }
2335 2336
2336 2337 void do_void() {
2337 2338 do {
2338 2339 if (_cm->verbose_high()) {
2339 2340 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2340 2341 _task->worker_id(), BOOL_TO_STR(_is_serial));
2341 2342 }
2342 2343
2343 2344 // We call CMTask::do_marking_step() to completely drain the local
2344 2345 // and global marking stacks of entries pushed by the 'keep alive'
2345 2346 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2346 2347 //
2347 2348 // CMTask::do_marking_step() is called in a loop, which we'll exit
2348 2349 // if there's nothing more to do (i.e. we'completely drained the
2349 2350 // entries that were pushed as a a result of applying the 'keep alive'
2350 2351 // closure to the entries on the discovered ref lists) or we overflow
2351 2352 // the global marking stack.
2352 2353 //
2353 2354 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2354 2355 // flag while there may still be some work to do. (See the comment at
2355 2356 // the beginning of CMTask::do_marking_step() for those conditions -
2356 2357 // one of which is reaching the specified time target.) It is only
2357 2358 // when CMTask::do_marking_step() returns without setting the
2358 2359 // has_aborted() flag that the marking step has completed.
2359 2360
2360 2361 _task->do_marking_step(1000000000.0 /* something very large */,
2361 2362 true /* do_termination */,
2362 2363 _is_serial);
2363 2364 } while (_task->has_aborted() && !_cm->has_overflown());
2364 2365 }
2365 2366 };
2366 2367
2367 2368 // Implementation of AbstractRefProcTaskExecutor for parallel
2368 2369 // reference processing at the end of G1 concurrent marking
2369 2370
2370 2371 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2371 2372 private:
2372 2373 G1CollectedHeap* _g1h;
2373 2374 ConcurrentMark* _cm;
2374 2375 WorkGang* _workers;
2375 2376 int _active_workers;
2376 2377
2377 2378 public:
2378 2379 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2379 2380 ConcurrentMark* cm,
2380 2381 WorkGang* workers,
2381 2382 int n_workers) :
2382 2383 _g1h(g1h), _cm(cm),
2383 2384 _workers(workers), _active_workers(n_workers) { }
2384 2385
2385 2386 // Executes the given task using concurrent marking worker threads.
2386 2387 virtual void execute(ProcessTask& task);
2387 2388 virtual void execute(EnqueueTask& task);
2388 2389 };
2389 2390
2390 2391 class G1CMRefProcTaskProxy: public AbstractGangTask {
2391 2392 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2392 2393 ProcessTask& _proc_task;
2393 2394 G1CollectedHeap* _g1h;
2394 2395 ConcurrentMark* _cm;
2395 2396
2396 2397 public:
2397 2398 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2398 2399 G1CollectedHeap* g1h,
2399 2400 ConcurrentMark* cm) :
2400 2401 AbstractGangTask("Process reference objects in parallel"),
2401 2402 _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2402 2403 ReferenceProcessor* rp = _g1h->ref_processor_cm();
2403 2404 assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2404 2405 }
2405 2406
2406 2407 virtual void work(uint worker_id) {
2407 2408 ResourceMark rm;
2408 2409 HandleMark hm;
2409 2410 CMTask* task = _cm->task(worker_id);
2410 2411 G1CMIsAliveClosure g1_is_alive(_g1h);
2411 2412 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2412 2413 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2413 2414
2414 2415 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2415 2416 }
2416 2417 };
2417 2418
2418 2419 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2419 2420 assert(_workers != NULL, "Need parallel worker threads.");
2420 2421 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2421 2422
2422 2423 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2423 2424
2424 2425 // We need to reset the concurrency level before each
2425 2426 // proxy task execution, so that the termination protocol
2426 2427 // and overflow handling in CMTask::do_marking_step() knows
2427 2428 // how many workers to wait for.
2428 2429 _cm->set_concurrency(_active_workers);
2429 2430 _g1h->set_par_threads(_active_workers);
2430 2431 _workers->run_task(&proc_task_proxy);
2431 2432 _g1h->set_par_threads(0);
2432 2433 }
2433 2434
2434 2435 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2435 2436 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2436 2437 EnqueueTask& _enq_task;
2437 2438
2438 2439 public:
2439 2440 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2440 2441 AbstractGangTask("Enqueue reference objects in parallel"),
2441 2442 _enq_task(enq_task) { }
2442 2443
2443 2444 virtual void work(uint worker_id) {
2444 2445 _enq_task.work(worker_id);
2445 2446 }
2446 2447 };
2447 2448
2448 2449 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2449 2450 assert(_workers != NULL, "Need parallel worker threads.");
2450 2451 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2451 2452
2452 2453 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2453 2454
2454 2455 // Not strictly necessary but...
2455 2456 //
2456 2457 // We need to reset the concurrency level before each
2457 2458 // proxy task execution, so that the termination protocol
2458 2459 // and overflow handling in CMTask::do_marking_step() knows
2459 2460 // how many workers to wait for.
2460 2461 _cm->set_concurrency(_active_workers);
2461 2462 _g1h->set_par_threads(_active_workers);
2462 2463 _workers->run_task(&enq_task_proxy);
2463 2464 _g1h->set_par_threads(0);
2464 2465 }
2465 2466
2466 2467 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2467 2468 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2468 2469 }
2469 2470
2470 2471 // Helper class to get rid of some boilerplate code.
2471 2472 class G1RemarkGCTraceTime : public GCTraceTime {
2472 2473 static bool doit_and_prepend(bool doit) {
2473 2474 if (doit) {
2474 2475 gclog_or_tty->put(' ');
2475 2476 }
2476 2477 return doit;
2477 2478 }
2478 2479
2479 2480 public:
2480 2481 G1RemarkGCTraceTime(const char* title, bool doit)
2481 2482 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
2482 2483 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
2483 2484 }
2484 2485 };
2485 2486
2486 2487 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2487 2488 if (has_overflown()) {
2488 2489 // Skip processing the discovered references if we have
2489 2490 // overflown the global marking stack. Reference objects
2490 2491 // only get discovered once so it is OK to not
2491 2492 // de-populate the discovered reference lists. We could have,
2492 2493 // but the only benefit would be that, when marking restarts,
2493 2494 // less reference objects are discovered.
2494 2495 return;
2495 2496 }
2496 2497
2497 2498 ResourceMark rm;
2498 2499 HandleMark hm;
2499 2500
2500 2501 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2501 2502
2502 2503 // Is alive closure.
2503 2504 G1CMIsAliveClosure g1_is_alive(g1h);
2504 2505
2505 2506 // Inner scope to exclude the cleaning of the string and symbol
2506 2507 // tables from the displayed time.
2507 2508 {
2508 2509 if (G1Log::finer()) {
2509 2510 gclog_or_tty->put(' ');
2510 2511 }
2511 2512 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
2512 2513
2513 2514 ReferenceProcessor* rp = g1h->ref_processor_cm();
2514 2515
2515 2516 // See the comment in G1CollectedHeap::ref_processing_init()
2516 2517 // about how reference processing currently works in G1.
2517 2518
2518 2519 // Set the soft reference policy
2519 2520 rp->setup_policy(clear_all_soft_refs);
2520 2521 assert(_markStack.isEmpty(), "mark stack should be empty");
2521 2522
2522 2523 // Instances of the 'Keep Alive' and 'Complete GC' closures used
2523 2524 // in serial reference processing. Note these closures are also
2524 2525 // used for serially processing (by the the current thread) the
2525 2526 // JNI references during parallel reference processing.
2526 2527 //
2527 2528 // These closures do not need to synchronize with the worker
2528 2529 // threads involved in parallel reference processing as these
2529 2530 // instances are executed serially by the current thread (e.g.
2530 2531 // reference processing is not multi-threaded and is thus
2531 2532 // performed by the current thread instead of a gang worker).
2532 2533 //
2533 2534 // The gang tasks involved in parallel reference procssing create
2534 2535 // their own instances of these closures, which do their own
2535 2536 // synchronization among themselves.
2536 2537 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2537 2538 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2538 2539
2539 2540 // We need at least one active thread. If reference processing
2540 2541 // is not multi-threaded we use the current (VMThread) thread,
2541 2542 // otherwise we use the work gang from the G1CollectedHeap and
2542 2543 // we utilize all the worker threads we can.
2543 2544 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2544 2545 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2545 2546 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2546 2547
2547 2548 // Parallel processing task executor.
2548 2549 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2549 2550 g1h->workers(), active_workers);
2550 2551 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2551 2552
2552 2553 // Set the concurrency level. The phase was already set prior to
2553 2554 // executing the remark task.
2554 2555 set_concurrency(active_workers);
2555 2556
2556 2557 // Set the degree of MT processing here. If the discovery was done MT,
2557 2558 // the number of threads involved during discovery could differ from
2558 2559 // the number of active workers. This is OK as long as the discovered
2559 2560 // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2560 2561 rp->set_active_mt_degree(active_workers);
2561 2562
2562 2563 // Process the weak references.
2563 2564 const ReferenceProcessorStats& stats =
2564 2565 rp->process_discovered_references(&g1_is_alive,
2565 2566 &g1_keep_alive,
2566 2567 &g1_drain_mark_stack,
2567 2568 executor,
2568 2569 g1h->gc_timer_cm(),
2569 2570 concurrent_gc_id());
2570 2571 g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2571 2572
2572 2573 // The do_oop work routines of the keep_alive and drain_marking_stack
2573 2574 // oop closures will set the has_overflown flag if we overflow the
2574 2575 // global marking stack.
2575 2576
2576 2577 assert(_markStack.overflow() || _markStack.isEmpty(),
2577 2578 "mark stack should be empty (unless it overflowed)");
2578 2579
2579 2580 if (_markStack.overflow()) {
2580 2581 // This should have been done already when we tried to push an
2581 2582 // entry on to the global mark stack. But let's do it again.
2582 2583 set_has_overflown();
2583 2584 }
2584 2585
2585 2586 assert(rp->num_q() == active_workers, "why not");
2586 2587
2587 2588 rp->enqueue_discovered_references(executor);
2588 2589
2589 2590 rp->verify_no_references_recorded();
2590 2591 assert(!rp->discovery_enabled(), "Post condition");
2591 2592 }
2592 2593
2593 2594 if (has_overflown()) {
2594 2595 // We can not trust g1_is_alive if the marking stack overflowed
↓ open down ↓ |
2559 lines elided |
↑ open up ↑ |
2595 2596 return;
2596 2597 }
2597 2598
2598 2599 assert(_markStack.isEmpty(), "Marking should have completed");
2599 2600
2600 2601 // Unload Klasses, String, Symbols, Code Cache, etc.
2601 2602 {
2602 2603 G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
2603 2604
2604 2605 if (ClassUnloadingWithConcurrentMark) {
2606 + // Cleaning of klasses depends on correct information from MetadataMarkOnStack. The CodeCache::mark_on_stack
2607 + // part is too slow to be done serially, so it is handled during the weakRefsWorkParallelPart phase.
2608 + // Defer the cleaning until we have complete on_stack data.
2609 + MetadataOnStackMark md_on_stack(false /* Don't visit the code cache at this point */);
2610 +
2605 2611 bool purged_classes;
2606 2612
2607 2613 {
2608 2614 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
2609 - purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
2615 + purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2610 2616 }
2611 2617
2612 2618 {
2613 2619 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
2614 2620 weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2615 2621 }
2622 +
2623 + {
2624 + G1RemarkGCTraceTime trace("Deallocate Metadata", G1Log::finest());
2625 + ClassLoaderDataGraph::free_deallocate_lists();
2626 + }
2616 2627 }
2617 2628
2618 2629 if (G1StringDedup::is_enabled()) {
2619 2630 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
2620 2631 G1StringDedup::unlink(&g1_is_alive);
2621 2632 }
2622 2633 }
2623 2634 }
2624 2635
2625 2636 void ConcurrentMark::swapMarkBitMaps() {
2626 2637 CMBitMapRO* temp = _prevMarkBitMap;
2627 2638 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2628 2639 _nextMarkBitMap = (CMBitMap*) temp;
2629 2640 }
2630 2641
2631 2642 class CMObjectClosure;
2632 2643
2633 2644 // Closure for iterating over objects, currently only used for
2634 2645 // processing SATB buffers.
2635 2646 class CMObjectClosure : public ObjectClosure {
2636 2647 private:
2637 2648 CMTask* _task;
2638 2649
2639 2650 public:
2640 2651 void do_object(oop obj) {
2641 2652 _task->deal_with_reference(obj);
2642 2653 }
2643 2654
2644 2655 CMObjectClosure(CMTask* task) : _task(task) { }
2645 2656 };
2646 2657
2647 2658 class G1RemarkThreadsClosure : public ThreadClosure {
2648 2659 CMObjectClosure _cm_obj;
2649 2660 G1CMOopClosure _cm_cl;
2650 2661 MarkingCodeBlobClosure _code_cl;
2651 2662 int _thread_parity;
2652 2663 bool _is_par;
2653 2664
2654 2665 public:
2655 2666 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
2656 2667 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2657 2668 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
2658 2669
2659 2670 void do_thread(Thread* thread) {
2660 2671 if (thread->is_Java_thread()) {
2661 2672 if (thread->claim_oops_do(_is_par, _thread_parity)) {
2662 2673 JavaThread* jt = (JavaThread*)thread;
2663 2674
2664 2675 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2665 2676 // however the liveness of oops reachable from nmethods have very complex lifecycles:
2666 2677 // * Alive if on the stack of an executing method
2667 2678 // * Weakly reachable otherwise
2668 2679 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2669 2680 // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2670 2681 jt->nmethods_do(&_code_cl);
2671 2682
2672 2683 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2673 2684 }
2674 2685 } else if (thread->is_VM_thread()) {
2675 2686 if (thread->claim_oops_do(_is_par, _thread_parity)) {
2676 2687 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2677 2688 }
2678 2689 }
2679 2690 }
2680 2691 };
2681 2692
2682 2693 class CMRemarkTask: public AbstractGangTask {
2683 2694 private:
2684 2695 ConcurrentMark* _cm;
2685 2696 bool _is_serial;
2686 2697 public:
2687 2698 void work(uint worker_id) {
2688 2699 // Since all available tasks are actually started, we should
2689 2700 // only proceed if we're supposed to be actived.
2690 2701 if (worker_id < _cm->active_tasks()) {
2691 2702 CMTask* task = _cm->task(worker_id);
2692 2703 task->record_start_time();
2693 2704 {
2694 2705 ResourceMark rm;
2695 2706 HandleMark hm;
2696 2707
2697 2708 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
2698 2709 Threads::threads_do(&threads_f);
2699 2710 }
2700 2711
2701 2712 do {
2702 2713 task->do_marking_step(1000000000.0 /* something very large */,
2703 2714 true /* do_termination */,
2704 2715 _is_serial);
2705 2716 } while (task->has_aborted() && !_cm->has_overflown());
2706 2717 // If we overflow, then we do not want to restart. We instead
2707 2718 // want to abort remark and do concurrent marking again.
2708 2719 task->record_end_time();
2709 2720 }
2710 2721 }
2711 2722
2712 2723 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2713 2724 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2714 2725 _cm->terminator()->reset_for_reuse(active_workers);
2715 2726 }
2716 2727 };
2717 2728
2718 2729 void ConcurrentMark::checkpointRootsFinalWork() {
2719 2730 ResourceMark rm;
2720 2731 HandleMark hm;
2721 2732 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2722 2733
2723 2734 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
2724 2735
2725 2736 g1h->ensure_parsability(false);
2726 2737
2727 2738 if (G1CollectedHeap::use_parallel_gc_threads()) {
2728 2739 G1CollectedHeap::StrongRootsScope srs(g1h);
2729 2740 // this is remark, so we'll use up all active threads
2730 2741 uint active_workers = g1h->workers()->active_workers();
2731 2742 if (active_workers == 0) {
2732 2743 assert(active_workers > 0, "Should have been set earlier");
2733 2744 active_workers = (uint) ParallelGCThreads;
2734 2745 g1h->workers()->set_active_workers(active_workers);
2735 2746 }
2736 2747 set_concurrency_and_phase(active_workers, false /* concurrent */);
2737 2748 // Leave _parallel_marking_threads at it's
2738 2749 // value originally calculated in the ConcurrentMark
2739 2750 // constructor and pass values of the active workers
2740 2751 // through the gang in the task.
2741 2752
2742 2753 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2743 2754 // We will start all available threads, even if we decide that the
2744 2755 // active_workers will be fewer. The extra ones will just bail out
2745 2756 // immediately.
2746 2757 g1h->set_par_threads(active_workers);
2747 2758 g1h->workers()->run_task(&remarkTask);
2748 2759 g1h->set_par_threads(0);
2749 2760 } else {
2750 2761 G1CollectedHeap::StrongRootsScope srs(g1h);
2751 2762 uint active_workers = 1;
2752 2763 set_concurrency_and_phase(active_workers, false /* concurrent */);
2753 2764
2754 2765 // Note - if there's no work gang then the VMThread will be
2755 2766 // the thread to execute the remark - serially. We have
2756 2767 // to pass true for the is_serial parameter so that
2757 2768 // CMTask::do_marking_step() doesn't enter the sync
2758 2769 // barriers in the event of an overflow. Doing so will
2759 2770 // cause an assert that the current thread is not a
2760 2771 // concurrent GC thread.
2761 2772 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2762 2773 remarkTask.work(0);
2763 2774 }
2764 2775 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2765 2776 guarantee(has_overflown() ||
2766 2777 satb_mq_set.completed_buffers_num() == 0,
2767 2778 err_msg("Invariant: has_overflown = %s, num buffers = %d",
2768 2779 BOOL_TO_STR(has_overflown()),
2769 2780 satb_mq_set.completed_buffers_num()));
2770 2781
2771 2782 print_stats();
2772 2783 }
2773 2784
2774 2785 #ifndef PRODUCT
2775 2786
2776 2787 class PrintReachableOopClosure: public OopClosure {
2777 2788 private:
2778 2789 G1CollectedHeap* _g1h;
2779 2790 outputStream* _out;
2780 2791 VerifyOption _vo;
2781 2792 bool _all;
2782 2793
2783 2794 public:
2784 2795 PrintReachableOopClosure(outputStream* out,
2785 2796 VerifyOption vo,
2786 2797 bool all) :
2787 2798 _g1h(G1CollectedHeap::heap()),
2788 2799 _out(out), _vo(vo), _all(all) { }
2789 2800
2790 2801 void do_oop(narrowOop* p) { do_oop_work(p); }
2791 2802 void do_oop( oop* p) { do_oop_work(p); }
2792 2803
2793 2804 template <class T> void do_oop_work(T* p) {
2794 2805 oop obj = oopDesc::load_decode_heap_oop(p);
2795 2806 const char* str = NULL;
2796 2807 const char* str2 = "";
2797 2808
2798 2809 if (obj == NULL) {
2799 2810 str = "";
2800 2811 } else if (!_g1h->is_in_g1_reserved(obj)) {
2801 2812 str = " O";
2802 2813 } else {
2803 2814 HeapRegion* hr = _g1h->heap_region_containing(obj);
2804 2815 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2805 2816 bool marked = _g1h->is_marked(obj, _vo);
2806 2817
2807 2818 if (over_tams) {
2808 2819 str = " >";
2809 2820 if (marked) {
2810 2821 str2 = " AND MARKED";
2811 2822 }
2812 2823 } else if (marked) {
2813 2824 str = " M";
2814 2825 } else {
2815 2826 str = " NOT";
2816 2827 }
2817 2828 }
2818 2829
2819 2830 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2820 2831 p2i(p), p2i((void*) obj), str, str2);
2821 2832 }
2822 2833 };
2823 2834
2824 2835 class PrintReachableObjectClosure : public ObjectClosure {
2825 2836 private:
2826 2837 G1CollectedHeap* _g1h;
2827 2838 outputStream* _out;
2828 2839 VerifyOption _vo;
2829 2840 bool _all;
2830 2841 HeapRegion* _hr;
2831 2842
2832 2843 public:
2833 2844 PrintReachableObjectClosure(outputStream* out,
2834 2845 VerifyOption vo,
2835 2846 bool all,
2836 2847 HeapRegion* hr) :
2837 2848 _g1h(G1CollectedHeap::heap()),
2838 2849 _out(out), _vo(vo), _all(all), _hr(hr) { }
2839 2850
2840 2851 void do_object(oop o) {
2841 2852 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2842 2853 bool marked = _g1h->is_marked(o, _vo);
2843 2854 bool print_it = _all || over_tams || marked;
2844 2855
2845 2856 if (print_it) {
2846 2857 _out->print_cr(" "PTR_FORMAT"%s",
2847 2858 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2848 2859 PrintReachableOopClosure oopCl(_out, _vo, _all);
2849 2860 o->oop_iterate_no_header(&oopCl);
2850 2861 }
2851 2862 }
2852 2863 };
2853 2864
2854 2865 class PrintReachableRegionClosure : public HeapRegionClosure {
2855 2866 private:
2856 2867 G1CollectedHeap* _g1h;
2857 2868 outputStream* _out;
2858 2869 VerifyOption _vo;
2859 2870 bool _all;
2860 2871
2861 2872 public:
2862 2873 bool doHeapRegion(HeapRegion* hr) {
2863 2874 HeapWord* b = hr->bottom();
2864 2875 HeapWord* e = hr->end();
2865 2876 HeapWord* t = hr->top();
2866 2877 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2867 2878 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2868 2879 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2869 2880 _out->cr();
2870 2881
2871 2882 HeapWord* from = b;
2872 2883 HeapWord* to = t;
2873 2884
2874 2885 if (to > from) {
2875 2886 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2876 2887 _out->cr();
2877 2888 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2878 2889 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2879 2890 _out->cr();
2880 2891 }
2881 2892
2882 2893 return false;
2883 2894 }
2884 2895
2885 2896 PrintReachableRegionClosure(outputStream* out,
2886 2897 VerifyOption vo,
2887 2898 bool all) :
2888 2899 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2889 2900 };
2890 2901
2891 2902 void ConcurrentMark::print_reachable(const char* str,
2892 2903 VerifyOption vo,
2893 2904 bool all) {
2894 2905 gclog_or_tty->cr();
2895 2906 gclog_or_tty->print_cr("== Doing heap dump... ");
2896 2907
2897 2908 if (G1PrintReachableBaseFile == NULL) {
2898 2909 gclog_or_tty->print_cr(" #### error: no base file defined");
2899 2910 return;
2900 2911 }
2901 2912
2902 2913 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2903 2914 (JVM_MAXPATHLEN - 1)) {
2904 2915 gclog_or_tty->print_cr(" #### error: file name too long");
2905 2916 return;
2906 2917 }
2907 2918
2908 2919 char file_name[JVM_MAXPATHLEN];
2909 2920 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2910 2921 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2911 2922
2912 2923 fileStream fout(file_name);
2913 2924 if (!fout.is_open()) {
2914 2925 gclog_or_tty->print_cr(" #### error: could not open file");
2915 2926 return;
2916 2927 }
2917 2928
2918 2929 outputStream* out = &fout;
2919 2930 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2920 2931 out->cr();
2921 2932
2922 2933 out->print_cr("--- ITERATING OVER REGIONS");
2923 2934 out->cr();
2924 2935 PrintReachableRegionClosure rcl(out, vo, all);
2925 2936 _g1h->heap_region_iterate(&rcl);
2926 2937 out->cr();
2927 2938
2928 2939 gclog_or_tty->print_cr(" done");
2929 2940 gclog_or_tty->flush();
2930 2941 }
2931 2942
2932 2943 #endif // PRODUCT
2933 2944
2934 2945 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2935 2946 // Note we are overriding the read-only view of the prev map here, via
2936 2947 // the cast.
2937 2948 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2938 2949 }
2939 2950
2940 2951 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2941 2952 _nextMarkBitMap->clearRange(mr);
2942 2953 }
2943 2954
2944 2955 HeapRegion*
2945 2956 ConcurrentMark::claim_region(uint worker_id) {
2946 2957 // "checkpoint" the finger
2947 2958 HeapWord* finger = _finger;
2948 2959
2949 2960 // _heap_end will not change underneath our feet; it only changes at
2950 2961 // yield points.
2951 2962 while (finger < _heap_end) {
2952 2963 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2953 2964
2954 2965 // Note on how this code handles humongous regions. In the
2955 2966 // normal case the finger will reach the start of a "starts
2956 2967 // humongous" (SH) region. Its end will either be the end of the
2957 2968 // last "continues humongous" (CH) region in the sequence, or the
2958 2969 // standard end of the SH region (if the SH is the only region in
2959 2970 // the sequence). That way claim_region() will skip over the CH
2960 2971 // regions. However, there is a subtle race between a CM thread
2961 2972 // executing this method and a mutator thread doing a humongous
2962 2973 // object allocation. The two are not mutually exclusive as the CM
2963 2974 // thread does not need to hold the Heap_lock when it gets
2964 2975 // here. So there is a chance that claim_region() will come across
2965 2976 // a free region that's in the progress of becoming a SH or a CH
2966 2977 // region. In the former case, it will either
2967 2978 // a) Miss the update to the region's end, in which case it will
2968 2979 // visit every subsequent CH region, will find their bitmaps
2969 2980 // empty, and do nothing, or
2970 2981 // b) Will observe the update of the region's end (in which case
2971 2982 // it will skip the subsequent CH regions).
2972 2983 // If it comes across a region that suddenly becomes CH, the
2973 2984 // scenario will be similar to b). So, the race between
2974 2985 // claim_region() and a humongous object allocation might force us
2975 2986 // to do a bit of unnecessary work (due to some unnecessary bitmap
2976 2987 // iterations) but it should not introduce and correctness issues.
2977 2988 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2978 2989
2979 2990 // Above heap_region_containing_raw may return NULL as we always scan claim
2980 2991 // until the end of the heap. In this case, just jump to the next region.
2981 2992 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2982 2993
2983 2994 // Is the gap between reading the finger and doing the CAS too long?
2984 2995 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2985 2996 if (res == finger && curr_region != NULL) {
2986 2997 // we succeeded
2987 2998 HeapWord* bottom = curr_region->bottom();
2988 2999 HeapWord* limit = curr_region->next_top_at_mark_start();
2989 3000
2990 3001 if (verbose_low()) {
2991 3002 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2992 3003 "["PTR_FORMAT", "PTR_FORMAT"), "
2993 3004 "limit = "PTR_FORMAT,
2994 3005 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2995 3006 }
2996 3007
2997 3008 // notice that _finger == end cannot be guaranteed here since,
2998 3009 // someone else might have moved the finger even further
2999 3010 assert(_finger >= end, "the finger should have moved forward");
3000 3011
3001 3012 if (verbose_low()) {
3002 3013 gclog_or_tty->print_cr("[%u] we were successful with region = "
3003 3014 PTR_FORMAT, worker_id, p2i(curr_region));
3004 3015 }
3005 3016
3006 3017 if (limit > bottom) {
3007 3018 if (verbose_low()) {
3008 3019 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
3009 3020 "returning it ", worker_id, p2i(curr_region));
3010 3021 }
3011 3022 return curr_region;
3012 3023 } else {
3013 3024 assert(limit == bottom,
3014 3025 "the region limit should be at bottom");
3015 3026 if (verbose_low()) {
3016 3027 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
3017 3028 "returning NULL", worker_id, p2i(curr_region));
3018 3029 }
3019 3030 // we return NULL and the caller should try calling
3020 3031 // claim_region() again.
3021 3032 return NULL;
3022 3033 }
3023 3034 } else {
3024 3035 assert(_finger > finger, "the finger should have moved forward");
3025 3036 if (verbose_low()) {
3026 3037 if (curr_region == NULL) {
3027 3038 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
3028 3039 "global finger = "PTR_FORMAT", "
3029 3040 "our finger = "PTR_FORMAT,
3030 3041 worker_id, p2i(_finger), p2i(finger));
3031 3042 } else {
3032 3043 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
3033 3044 "global finger = "PTR_FORMAT", "
3034 3045 "our finger = "PTR_FORMAT,
3035 3046 worker_id, p2i(_finger), p2i(finger));
3036 3047 }
3037 3048 }
3038 3049
3039 3050 // read it again
3040 3051 finger = _finger;
3041 3052 }
3042 3053 }
3043 3054
3044 3055 return NULL;
3045 3056 }
3046 3057
3047 3058 #ifndef PRODUCT
3048 3059 enum VerifyNoCSetOopsPhase {
3049 3060 VerifyNoCSetOopsStack,
3050 3061 VerifyNoCSetOopsQueues,
3051 3062 VerifyNoCSetOopsSATBCompleted,
3052 3063 VerifyNoCSetOopsSATBThread
3053 3064 };
3054 3065
3055 3066 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
3056 3067 private:
3057 3068 G1CollectedHeap* _g1h;
3058 3069 VerifyNoCSetOopsPhase _phase;
3059 3070 int _info;
3060 3071
3061 3072 const char* phase_str() {
3062 3073 switch (_phase) {
3063 3074 case VerifyNoCSetOopsStack: return "Stack";
3064 3075 case VerifyNoCSetOopsQueues: return "Queue";
3065 3076 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
3066 3077 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
3067 3078 default: ShouldNotReachHere();
3068 3079 }
3069 3080 return NULL;
3070 3081 }
3071 3082
3072 3083 void do_object_work(oop obj) {
3073 3084 guarantee(!_g1h->obj_in_cs(obj),
3074 3085 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
3075 3086 p2i((void*) obj), phase_str(), _info));
3076 3087 }
3077 3088
3078 3089 public:
3079 3090 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
3080 3091
3081 3092 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
3082 3093 _phase = phase;
3083 3094 _info = info;
3084 3095 }
3085 3096
3086 3097 virtual void do_oop(oop* p) {
3087 3098 oop obj = oopDesc::load_decode_heap_oop(p);
3088 3099 do_object_work(obj);
3089 3100 }
3090 3101
3091 3102 virtual void do_oop(narrowOop* p) {
3092 3103 // We should not come across narrow oops while scanning marking
3093 3104 // stacks and SATB buffers.
3094 3105 ShouldNotReachHere();
3095 3106 }
3096 3107
3097 3108 virtual void do_object(oop obj) {
3098 3109 do_object_work(obj);
3099 3110 }
3100 3111 };
3101 3112
3102 3113 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3103 3114 bool verify_enqueued_buffers,
3104 3115 bool verify_thread_buffers,
3105 3116 bool verify_fingers) {
3106 3117 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3107 3118 if (!G1CollectedHeap::heap()->mark_in_progress()) {
3108 3119 return;
3109 3120 }
3110 3121
3111 3122 VerifyNoCSetOopsClosure cl;
3112 3123
3113 3124 if (verify_stacks) {
3114 3125 // Verify entries on the global mark stack
3115 3126 cl.set_phase(VerifyNoCSetOopsStack);
3116 3127 _markStack.oops_do(&cl);
3117 3128
3118 3129 // Verify entries on the task queues
3119 3130 for (uint i = 0; i < _max_worker_id; i += 1) {
3120 3131 cl.set_phase(VerifyNoCSetOopsQueues, i);
3121 3132 CMTaskQueue* queue = _task_queues->queue(i);
3122 3133 queue->oops_do(&cl);
3123 3134 }
3124 3135 }
3125 3136
3126 3137 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3127 3138
3128 3139 // Verify entries on the enqueued SATB buffers
3129 3140 if (verify_enqueued_buffers) {
3130 3141 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3131 3142 satb_qs.iterate_completed_buffers_read_only(&cl);
3132 3143 }
3133 3144
3134 3145 // Verify entries on the per-thread SATB buffers
3135 3146 if (verify_thread_buffers) {
3136 3147 cl.set_phase(VerifyNoCSetOopsSATBThread);
3137 3148 satb_qs.iterate_thread_buffers_read_only(&cl);
3138 3149 }
3139 3150
3140 3151 if (verify_fingers) {
3141 3152 // Verify the global finger
3142 3153 HeapWord* global_finger = finger();
3143 3154 if (global_finger != NULL && global_finger < _heap_end) {
3144 3155 // The global finger always points to a heap region boundary. We
3145 3156 // use heap_region_containing_raw() to get the containing region
3146 3157 // given that the global finger could be pointing to a free region
3147 3158 // which subsequently becomes continues humongous. If that
3148 3159 // happens, heap_region_containing() will return the bottom of the
3149 3160 // corresponding starts humongous region and the check below will
3150 3161 // not hold any more.
3151 3162 // Since we always iterate over all regions, we might get a NULL HeapRegion
3152 3163 // here.
3153 3164 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3154 3165 guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3155 3166 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3156 3167 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3157 3168 }
3158 3169
3159 3170 // Verify the task fingers
3160 3171 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3161 3172 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3162 3173 CMTask* task = _tasks[i];
3163 3174 HeapWord* task_finger = task->finger();
3164 3175 if (task_finger != NULL && task_finger < _heap_end) {
3165 3176 // See above note on the global finger verification.
3166 3177 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3167 3178 guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3168 3179 !task_hr->in_collection_set(),
3169 3180 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3170 3181 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3171 3182 }
3172 3183 }
3173 3184 }
3174 3185 }
3175 3186 #endif // PRODUCT
3176 3187
3177 3188 // Aggregate the counting data that was constructed concurrently
3178 3189 // with marking.
3179 3190 class AggregateCountDataHRClosure: public HeapRegionClosure {
3180 3191 G1CollectedHeap* _g1h;
3181 3192 ConcurrentMark* _cm;
3182 3193 CardTableModRefBS* _ct_bs;
3183 3194 BitMap* _cm_card_bm;
3184 3195 uint _max_worker_id;
3185 3196
3186 3197 public:
3187 3198 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3188 3199 BitMap* cm_card_bm,
3189 3200 uint max_worker_id) :
3190 3201 _g1h(g1h), _cm(g1h->concurrent_mark()),
3191 3202 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3192 3203 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3193 3204
3194 3205 bool doHeapRegion(HeapRegion* hr) {
3195 3206 if (hr->continuesHumongous()) {
3196 3207 // We will ignore these here and process them when their
3197 3208 // associated "starts humongous" region is processed.
3198 3209 // Note that we cannot rely on their associated
3199 3210 // "starts humongous" region to have their bit set to 1
3200 3211 // since, due to the region chunking in the parallel region
3201 3212 // iteration, a "continues humongous" region might be visited
3202 3213 // before its associated "starts humongous".
3203 3214 return false;
3204 3215 }
3205 3216
3206 3217 HeapWord* start = hr->bottom();
3207 3218 HeapWord* limit = hr->next_top_at_mark_start();
3208 3219 HeapWord* end = hr->end();
3209 3220
3210 3221 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3211 3222 err_msg("Preconditions not met - "
3212 3223 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3213 3224 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3214 3225 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3215 3226
3216 3227 assert(hr->next_marked_bytes() == 0, "Precondition");
3217 3228
3218 3229 if (start == limit) {
3219 3230 // NTAMS of this region has not been set so nothing to do.
3220 3231 return false;
3221 3232 }
3222 3233
3223 3234 // 'start' should be in the heap.
3224 3235 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3225 3236 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3226 3237 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3227 3238
3228 3239 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3229 3240 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3230 3241 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3231 3242
3232 3243 // If ntams is not card aligned then we bump card bitmap index
3233 3244 // for limit so that we get the all the cards spanned by
3234 3245 // the object ending at ntams.
3235 3246 // Note: if this is the last region in the heap then ntams
3236 3247 // could be actually just beyond the end of the the heap;
3237 3248 // limit_idx will then correspond to a (non-existent) card
3238 3249 // that is also outside the heap.
3239 3250 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3240 3251 limit_idx += 1;
3241 3252 }
3242 3253
3243 3254 assert(limit_idx <= end_idx, "or else use atomics");
3244 3255
3245 3256 // Aggregate the "stripe" in the count data associated with hr.
3246 3257 uint hrm_index = hr->hrm_index();
3247 3258 size_t marked_bytes = 0;
3248 3259
3249 3260 for (uint i = 0; i < _max_worker_id; i += 1) {
3250 3261 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3251 3262 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3252 3263
3253 3264 // Fetch the marked_bytes in this region for task i and
3254 3265 // add it to the running total for this region.
3255 3266 marked_bytes += marked_bytes_array[hrm_index];
3256 3267
3257 3268 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3258 3269 // into the global card bitmap.
3259 3270 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3260 3271
3261 3272 while (scan_idx < limit_idx) {
3262 3273 assert(task_card_bm->at(scan_idx) == true, "should be");
3263 3274 _cm_card_bm->set_bit(scan_idx);
3264 3275 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3265 3276
3266 3277 // BitMap::get_next_one_offset() can handle the case when
3267 3278 // its left_offset parameter is greater than its right_offset
3268 3279 // parameter. It does, however, have an early exit if
3269 3280 // left_offset == right_offset. So let's limit the value
3270 3281 // passed in for left offset here.
3271 3282 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3272 3283 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3273 3284 }
3274 3285 }
3275 3286
3276 3287 // Update the marked bytes for this region.
3277 3288 hr->add_to_marked_bytes(marked_bytes);
3278 3289
3279 3290 // Next heap region
3280 3291 return false;
3281 3292 }
3282 3293 };
3283 3294
3284 3295 class G1AggregateCountDataTask: public AbstractGangTask {
3285 3296 protected:
3286 3297 G1CollectedHeap* _g1h;
3287 3298 ConcurrentMark* _cm;
3288 3299 BitMap* _cm_card_bm;
3289 3300 uint _max_worker_id;
3290 3301 int _active_workers;
3291 3302
3292 3303 public:
3293 3304 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3294 3305 ConcurrentMark* cm,
3295 3306 BitMap* cm_card_bm,
3296 3307 uint max_worker_id,
3297 3308 int n_workers) :
3298 3309 AbstractGangTask("Count Aggregation"),
3299 3310 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3300 3311 _max_worker_id(max_worker_id),
3301 3312 _active_workers(n_workers) { }
3302 3313
3303 3314 void work(uint worker_id) {
3304 3315 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3305 3316
3306 3317 if (G1CollectedHeap::use_parallel_gc_threads()) {
3307 3318 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3308 3319 _active_workers,
3309 3320 HeapRegion::AggregateCountClaimValue);
3310 3321 } else {
3311 3322 _g1h->heap_region_iterate(&cl);
3312 3323 }
3313 3324 }
3314 3325 };
3315 3326
3316 3327
3317 3328 void ConcurrentMark::aggregate_count_data() {
3318 3329 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3319 3330 _g1h->workers()->active_workers() :
3320 3331 1);
3321 3332
3322 3333 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3323 3334 _max_worker_id, n_workers);
3324 3335
3325 3336 if (G1CollectedHeap::use_parallel_gc_threads()) {
3326 3337 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3327 3338 "sanity check");
3328 3339 _g1h->set_par_threads(n_workers);
3329 3340 _g1h->workers()->run_task(&g1_par_agg_task);
3330 3341 _g1h->set_par_threads(0);
3331 3342
3332 3343 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3333 3344 "sanity check");
3334 3345 _g1h->reset_heap_region_claim_values();
3335 3346 } else {
3336 3347 g1_par_agg_task.work(0);
3337 3348 }
3338 3349 _g1h->allocation_context_stats().update_at_remark();
3339 3350 }
3340 3351
3341 3352 // Clear the per-worker arrays used to store the per-region counting data
3342 3353 void ConcurrentMark::clear_all_count_data() {
3343 3354 // Clear the global card bitmap - it will be filled during
3344 3355 // liveness count aggregation (during remark) and the
3345 3356 // final counting task.
3346 3357 _card_bm.clear();
3347 3358
3348 3359 // Clear the global region bitmap - it will be filled as part
3349 3360 // of the final counting task.
3350 3361 _region_bm.clear();
3351 3362
3352 3363 uint max_regions = _g1h->max_regions();
3353 3364 assert(_max_worker_id > 0, "uninitialized");
3354 3365
3355 3366 for (uint i = 0; i < _max_worker_id; i += 1) {
3356 3367 BitMap* task_card_bm = count_card_bitmap_for(i);
3357 3368 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3358 3369
3359 3370 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3360 3371 assert(marked_bytes_array != NULL, "uninitialized");
3361 3372
3362 3373 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3363 3374 task_card_bm->clear();
3364 3375 }
3365 3376 }
3366 3377
3367 3378 void ConcurrentMark::print_stats() {
3368 3379 if (verbose_stats()) {
3369 3380 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3370 3381 for (size_t i = 0; i < _active_tasks; ++i) {
3371 3382 _tasks[i]->print_stats();
3372 3383 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3373 3384 }
3374 3385 }
3375 3386 }
3376 3387
3377 3388 // abandon current marking iteration due to a Full GC
3378 3389 void ConcurrentMark::abort() {
3379 3390 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3380 3391 // concurrent bitmap clearing.
3381 3392 _nextMarkBitMap->clearAll();
3382 3393
3383 3394 // Note we cannot clear the previous marking bitmap here
3384 3395 // since VerifyDuringGC verifies the objects marked during
3385 3396 // a full GC against the previous bitmap.
3386 3397
3387 3398 // Clear the liveness counting data
3388 3399 clear_all_count_data();
3389 3400 // Empty mark stack
3390 3401 reset_marking_state();
3391 3402 for (uint i = 0; i < _max_worker_id; ++i) {
3392 3403 _tasks[i]->clear_region_fields();
3393 3404 }
3394 3405 _first_overflow_barrier_sync.abort();
3395 3406 _second_overflow_barrier_sync.abort();
3396 3407 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3397 3408 if (!gc_id.is_undefined()) {
3398 3409 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3399 3410 // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3400 3411 _aborted_gc_id = gc_id;
3401 3412 }
3402 3413 _has_aborted = true;
3403 3414
3404 3415 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3405 3416 satb_mq_set.abandon_partial_marking();
3406 3417 // This can be called either during or outside marking, we'll read
3407 3418 // the expected_active value from the SATB queue set.
3408 3419 satb_mq_set.set_active_all_threads(
3409 3420 false, /* new active value */
3410 3421 satb_mq_set.is_active() /* expected_active */);
3411 3422
3412 3423 _g1h->trace_heap_after_concurrent_cycle();
3413 3424 _g1h->register_concurrent_cycle_end();
3414 3425 }
3415 3426
3416 3427 const GCId& ConcurrentMark::concurrent_gc_id() {
3417 3428 if (has_aborted()) {
3418 3429 return _aborted_gc_id;
3419 3430 }
3420 3431 return _g1h->gc_tracer_cm()->gc_id();
3421 3432 }
3422 3433
3423 3434 static void print_ms_time_info(const char* prefix, const char* name,
3424 3435 NumberSeq& ns) {
3425 3436 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3426 3437 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3427 3438 if (ns.num() > 0) {
3428 3439 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3429 3440 prefix, ns.sd(), ns.maximum());
3430 3441 }
3431 3442 }
3432 3443
3433 3444 void ConcurrentMark::print_summary_info() {
3434 3445 gclog_or_tty->print_cr(" Concurrent marking:");
3435 3446 print_ms_time_info(" ", "init marks", _init_times);
3436 3447 print_ms_time_info(" ", "remarks", _remark_times);
3437 3448 {
3438 3449 print_ms_time_info(" ", "final marks", _remark_mark_times);
3439 3450 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3440 3451
3441 3452 }
3442 3453 print_ms_time_info(" ", "cleanups", _cleanup_times);
3443 3454 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3444 3455 _total_counting_time,
3445 3456 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3446 3457 (double)_cleanup_times.num()
3447 3458 : 0.0));
3448 3459 if (G1ScrubRemSets) {
3449 3460 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3450 3461 _total_rs_scrub_time,
3451 3462 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3452 3463 (double)_cleanup_times.num()
3453 3464 : 0.0));
3454 3465 }
3455 3466 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3456 3467 (_init_times.sum() + _remark_times.sum() +
3457 3468 _cleanup_times.sum())/1000.0);
3458 3469 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3459 3470 "(%8.2f s marking).",
3460 3471 cmThread()->vtime_accum(),
3461 3472 cmThread()->vtime_mark_accum());
3462 3473 }
3463 3474
3464 3475 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3465 3476 if (use_parallel_marking_threads()) {
3466 3477 _parallel_workers->print_worker_threads_on(st);
3467 3478 }
3468 3479 }
3469 3480
3470 3481 void ConcurrentMark::print_on_error(outputStream* st) const {
3471 3482 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3472 3483 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3473 3484 _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3474 3485 _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3475 3486 }
3476 3487
3477 3488 // We take a break if someone is trying to stop the world.
3478 3489 bool ConcurrentMark::do_yield_check(uint worker_id) {
3479 3490 if (SuspendibleThreadSet::should_yield()) {
3480 3491 if (worker_id == 0) {
3481 3492 _g1h->g1_policy()->record_concurrent_pause();
3482 3493 }
3483 3494 SuspendibleThreadSet::yield();
3484 3495 return true;
3485 3496 } else {
3486 3497 return false;
3487 3498 }
3488 3499 }
3489 3500
3490 3501 #ifndef PRODUCT
3491 3502 // for debugging purposes
3492 3503 void ConcurrentMark::print_finger() {
3493 3504 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3494 3505 p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3495 3506 for (uint i = 0; i < _max_worker_id; ++i) {
3496 3507 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3497 3508 }
3498 3509 gclog_or_tty->cr();
3499 3510 }
3500 3511 #endif
3501 3512
3502 3513 void CMTask::scan_object(oop obj) {
3503 3514 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3504 3515
3505 3516 if (_cm->verbose_high()) {
3506 3517 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3507 3518 _worker_id, p2i((void*) obj));
3508 3519 }
3509 3520
3510 3521 size_t obj_size = obj->size();
3511 3522 _words_scanned += obj_size;
3512 3523
3513 3524 obj->oop_iterate(_cm_oop_closure);
3514 3525 statsOnly( ++_objs_scanned );
3515 3526 check_limits();
3516 3527 }
3517 3528
3518 3529 // Closure for iteration over bitmaps
3519 3530 class CMBitMapClosure : public BitMapClosure {
3520 3531 private:
3521 3532 // the bitmap that is being iterated over
3522 3533 CMBitMap* _nextMarkBitMap;
3523 3534 ConcurrentMark* _cm;
3524 3535 CMTask* _task;
3525 3536
3526 3537 public:
3527 3538 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3528 3539 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3529 3540
3530 3541 bool do_bit(size_t offset) {
3531 3542 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3532 3543 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3533 3544 assert( addr < _cm->finger(), "invariant");
3534 3545
3535 3546 statsOnly( _task->increase_objs_found_on_bitmap() );
3536 3547 assert(addr >= _task->finger(), "invariant");
3537 3548
3538 3549 // We move that task's local finger along.
3539 3550 _task->move_finger_to(addr);
3540 3551
3541 3552 _task->scan_object(oop(addr));
3542 3553 // we only partially drain the local queue and global stack
3543 3554 _task->drain_local_queue(true);
3544 3555 _task->drain_global_stack(true);
3545 3556
3546 3557 // if the has_aborted flag has been raised, we need to bail out of
3547 3558 // the iteration
3548 3559 return !_task->has_aborted();
3549 3560 }
3550 3561 };
3551 3562
3552 3563 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3553 3564 ConcurrentMark* cm,
3554 3565 CMTask* task)
3555 3566 : _g1h(g1h), _cm(cm), _task(task) {
3556 3567 assert(_ref_processor == NULL, "should be initialized to NULL");
3557 3568
3558 3569 if (G1UseConcMarkReferenceProcessing) {
3559 3570 _ref_processor = g1h->ref_processor_cm();
3560 3571 assert(_ref_processor != NULL, "should not be NULL");
3561 3572 }
3562 3573 }
3563 3574
3564 3575 void CMTask::setup_for_region(HeapRegion* hr) {
3565 3576 assert(hr != NULL,
3566 3577 "claim_region() should have filtered out NULL regions");
3567 3578 assert(!hr->continuesHumongous(),
3568 3579 "claim_region() should have filtered out continues humongous regions");
3569 3580
3570 3581 if (_cm->verbose_low()) {
3571 3582 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3572 3583 _worker_id, p2i(hr));
3573 3584 }
3574 3585
3575 3586 _curr_region = hr;
3576 3587 _finger = hr->bottom();
3577 3588 update_region_limit();
3578 3589 }
3579 3590
3580 3591 void CMTask::update_region_limit() {
3581 3592 HeapRegion* hr = _curr_region;
3582 3593 HeapWord* bottom = hr->bottom();
3583 3594 HeapWord* limit = hr->next_top_at_mark_start();
3584 3595
3585 3596 if (limit == bottom) {
3586 3597 if (_cm->verbose_low()) {
3587 3598 gclog_or_tty->print_cr("[%u] found an empty region "
3588 3599 "["PTR_FORMAT", "PTR_FORMAT")",
3589 3600 _worker_id, p2i(bottom), p2i(limit));
3590 3601 }
3591 3602 // The region was collected underneath our feet.
3592 3603 // We set the finger to bottom to ensure that the bitmap
3593 3604 // iteration that will follow this will not do anything.
3594 3605 // (this is not a condition that holds when we set the region up,
3595 3606 // as the region is not supposed to be empty in the first place)
3596 3607 _finger = bottom;
3597 3608 } else if (limit >= _region_limit) {
3598 3609 assert(limit >= _finger, "peace of mind");
3599 3610 } else {
3600 3611 assert(limit < _region_limit, "only way to get here");
3601 3612 // This can happen under some pretty unusual circumstances. An
3602 3613 // evacuation pause empties the region underneath our feet (NTAMS
3603 3614 // at bottom). We then do some allocation in the region (NTAMS
3604 3615 // stays at bottom), followed by the region being used as a GC
3605 3616 // alloc region (NTAMS will move to top() and the objects
3606 3617 // originally below it will be grayed). All objects now marked in
3607 3618 // the region are explicitly grayed, if below the global finger,
3608 3619 // and we do not need in fact to scan anything else. So, we simply
3609 3620 // set _finger to be limit to ensure that the bitmap iteration
3610 3621 // doesn't do anything.
3611 3622 _finger = limit;
3612 3623 }
3613 3624
3614 3625 _region_limit = limit;
3615 3626 }
3616 3627
3617 3628 void CMTask::giveup_current_region() {
3618 3629 assert(_curr_region != NULL, "invariant");
3619 3630 if (_cm->verbose_low()) {
3620 3631 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3621 3632 _worker_id, p2i(_curr_region));
3622 3633 }
3623 3634 clear_region_fields();
3624 3635 }
3625 3636
3626 3637 void CMTask::clear_region_fields() {
3627 3638 // Values for these three fields that indicate that we're not
3628 3639 // holding on to a region.
3629 3640 _curr_region = NULL;
3630 3641 _finger = NULL;
3631 3642 _region_limit = NULL;
3632 3643 }
3633 3644
3634 3645 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3635 3646 if (cm_oop_closure == NULL) {
3636 3647 assert(_cm_oop_closure != NULL, "invariant");
3637 3648 } else {
3638 3649 assert(_cm_oop_closure == NULL, "invariant");
3639 3650 }
3640 3651 _cm_oop_closure = cm_oop_closure;
3641 3652 }
3642 3653
3643 3654 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3644 3655 guarantee(nextMarkBitMap != NULL, "invariant");
3645 3656
3646 3657 if (_cm->verbose_low()) {
3647 3658 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3648 3659 }
3649 3660
3650 3661 _nextMarkBitMap = nextMarkBitMap;
3651 3662 clear_region_fields();
3652 3663
3653 3664 _calls = 0;
3654 3665 _elapsed_time_ms = 0.0;
3655 3666 _termination_time_ms = 0.0;
3656 3667 _termination_start_time_ms = 0.0;
3657 3668
3658 3669 #if _MARKING_STATS_
3659 3670 _local_pushes = 0;
3660 3671 _local_pops = 0;
3661 3672 _local_max_size = 0;
3662 3673 _objs_scanned = 0;
3663 3674 _global_pushes = 0;
3664 3675 _global_pops = 0;
3665 3676 _global_max_size = 0;
3666 3677 _global_transfers_to = 0;
3667 3678 _global_transfers_from = 0;
3668 3679 _regions_claimed = 0;
3669 3680 _objs_found_on_bitmap = 0;
3670 3681 _satb_buffers_processed = 0;
3671 3682 _steal_attempts = 0;
3672 3683 _steals = 0;
3673 3684 _aborted = 0;
3674 3685 _aborted_overflow = 0;
3675 3686 _aborted_cm_aborted = 0;
3676 3687 _aborted_yield = 0;
3677 3688 _aborted_timed_out = 0;
3678 3689 _aborted_satb = 0;
3679 3690 _aborted_termination = 0;
3680 3691 #endif // _MARKING_STATS_
3681 3692 }
3682 3693
3683 3694 bool CMTask::should_exit_termination() {
3684 3695 regular_clock_call();
3685 3696 // This is called when we are in the termination protocol. We should
3686 3697 // quit if, for some reason, this task wants to abort or the global
3687 3698 // stack is not empty (this means that we can get work from it).
3688 3699 return !_cm->mark_stack_empty() || has_aborted();
3689 3700 }
3690 3701
3691 3702 void CMTask::reached_limit() {
3692 3703 assert(_words_scanned >= _words_scanned_limit ||
3693 3704 _refs_reached >= _refs_reached_limit ,
3694 3705 "shouldn't have been called otherwise");
3695 3706 regular_clock_call();
3696 3707 }
3697 3708
3698 3709 void CMTask::regular_clock_call() {
3699 3710 if (has_aborted()) return;
3700 3711
3701 3712 // First, we need to recalculate the words scanned and refs reached
3702 3713 // limits for the next clock call.
3703 3714 recalculate_limits();
3704 3715
3705 3716 // During the regular clock call we do the following
3706 3717
3707 3718 // (1) If an overflow has been flagged, then we abort.
3708 3719 if (_cm->has_overflown()) {
3709 3720 set_has_aborted();
3710 3721 return;
3711 3722 }
3712 3723
3713 3724 // If we are not concurrent (i.e. we're doing remark) we don't need
3714 3725 // to check anything else. The other steps are only needed during
3715 3726 // the concurrent marking phase.
3716 3727 if (!concurrent()) return;
3717 3728
3718 3729 // (2) If marking has been aborted for Full GC, then we also abort.
3719 3730 if (_cm->has_aborted()) {
3720 3731 set_has_aborted();
3721 3732 statsOnly( ++_aborted_cm_aborted );
3722 3733 return;
3723 3734 }
3724 3735
3725 3736 double curr_time_ms = os::elapsedVTime() * 1000.0;
3726 3737
3727 3738 // (3) If marking stats are enabled, then we update the step history.
3728 3739 #if _MARKING_STATS_
3729 3740 if (_words_scanned >= _words_scanned_limit) {
3730 3741 ++_clock_due_to_scanning;
3731 3742 }
3732 3743 if (_refs_reached >= _refs_reached_limit) {
3733 3744 ++_clock_due_to_marking;
3734 3745 }
3735 3746
3736 3747 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3737 3748 _interval_start_time_ms = curr_time_ms;
3738 3749 _all_clock_intervals_ms.add(last_interval_ms);
3739 3750
3740 3751 if (_cm->verbose_medium()) {
3741 3752 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3742 3753 "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3743 3754 _worker_id, last_interval_ms,
3744 3755 _words_scanned,
3745 3756 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3746 3757 _refs_reached,
3747 3758 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3748 3759 }
3749 3760 #endif // _MARKING_STATS_
3750 3761
3751 3762 // (4) We check whether we should yield. If we have to, then we abort.
3752 3763 if (SuspendibleThreadSet::should_yield()) {
3753 3764 // We should yield. To do this we abort the task. The caller is
3754 3765 // responsible for yielding.
3755 3766 set_has_aborted();
3756 3767 statsOnly( ++_aborted_yield );
3757 3768 return;
3758 3769 }
3759 3770
3760 3771 // (5) We check whether we've reached our time quota. If we have,
3761 3772 // then we abort.
3762 3773 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3763 3774 if (elapsed_time_ms > _time_target_ms) {
3764 3775 set_has_aborted();
3765 3776 _has_timed_out = true;
3766 3777 statsOnly( ++_aborted_timed_out );
3767 3778 return;
3768 3779 }
3769 3780
3770 3781 // (6) Finally, we check whether there are enough completed STAB
3771 3782 // buffers available for processing. If there are, we abort.
3772 3783 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3773 3784 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3774 3785 if (_cm->verbose_low()) {
3775 3786 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3776 3787 _worker_id);
3777 3788 }
3778 3789 // we do need to process SATB buffers, we'll abort and restart
3779 3790 // the marking task to do so
3780 3791 set_has_aborted();
3781 3792 statsOnly( ++_aborted_satb );
3782 3793 return;
3783 3794 }
3784 3795 }
3785 3796
3786 3797 void CMTask::recalculate_limits() {
3787 3798 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3788 3799 _words_scanned_limit = _real_words_scanned_limit;
3789 3800
3790 3801 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3791 3802 _refs_reached_limit = _real_refs_reached_limit;
3792 3803 }
3793 3804
3794 3805 void CMTask::decrease_limits() {
3795 3806 // This is called when we believe that we're going to do an infrequent
3796 3807 // operation which will increase the per byte scanned cost (i.e. move
3797 3808 // entries to/from the global stack). It basically tries to decrease the
3798 3809 // scanning limit so that the clock is called earlier.
3799 3810
3800 3811 if (_cm->verbose_medium()) {
3801 3812 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3802 3813 }
3803 3814
3804 3815 _words_scanned_limit = _real_words_scanned_limit -
3805 3816 3 * words_scanned_period / 4;
3806 3817 _refs_reached_limit = _real_refs_reached_limit -
3807 3818 3 * refs_reached_period / 4;
3808 3819 }
3809 3820
3810 3821 void CMTask::move_entries_to_global_stack() {
3811 3822 // local array where we'll store the entries that will be popped
3812 3823 // from the local queue
3813 3824 oop buffer[global_stack_transfer_size];
3814 3825
3815 3826 int n = 0;
3816 3827 oop obj;
3817 3828 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3818 3829 buffer[n] = obj;
3819 3830 ++n;
3820 3831 }
3821 3832
3822 3833 if (n > 0) {
3823 3834 // we popped at least one entry from the local queue
3824 3835
3825 3836 statsOnly( ++_global_transfers_to; _local_pops += n );
3826 3837
3827 3838 if (!_cm->mark_stack_push(buffer, n)) {
3828 3839 if (_cm->verbose_low()) {
3829 3840 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3830 3841 _worker_id);
3831 3842 }
3832 3843 set_has_aborted();
3833 3844 } else {
3834 3845 // the transfer was successful
3835 3846
3836 3847 if (_cm->verbose_medium()) {
3837 3848 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3838 3849 _worker_id, n);
3839 3850 }
3840 3851 statsOnly( int tmp_size = _cm->mark_stack_size();
3841 3852 if (tmp_size > _global_max_size) {
3842 3853 _global_max_size = tmp_size;
3843 3854 }
3844 3855 _global_pushes += n );
3845 3856 }
3846 3857 }
3847 3858
3848 3859 // this operation was quite expensive, so decrease the limits
3849 3860 decrease_limits();
3850 3861 }
3851 3862
3852 3863 void CMTask::get_entries_from_global_stack() {
3853 3864 // local array where we'll store the entries that will be popped
3854 3865 // from the global stack.
3855 3866 oop buffer[global_stack_transfer_size];
3856 3867 int n;
3857 3868 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3858 3869 assert(n <= global_stack_transfer_size,
3859 3870 "we should not pop more than the given limit");
3860 3871 if (n > 0) {
3861 3872 // yes, we did actually pop at least one entry
3862 3873
3863 3874 statsOnly( ++_global_transfers_from; _global_pops += n );
3864 3875 if (_cm->verbose_medium()) {
3865 3876 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3866 3877 _worker_id, n);
3867 3878 }
3868 3879 for (int i = 0; i < n; ++i) {
3869 3880 bool success = _task_queue->push(buffer[i]);
3870 3881 // We only call this when the local queue is empty or under a
3871 3882 // given target limit. So, we do not expect this push to fail.
3872 3883 assert(success, "invariant");
3873 3884 }
3874 3885
3875 3886 statsOnly( int tmp_size = _task_queue->size();
3876 3887 if (tmp_size > _local_max_size) {
3877 3888 _local_max_size = tmp_size;
3878 3889 }
3879 3890 _local_pushes += n );
3880 3891 }
3881 3892
3882 3893 // this operation was quite expensive, so decrease the limits
3883 3894 decrease_limits();
3884 3895 }
3885 3896
3886 3897 void CMTask::drain_local_queue(bool partially) {
3887 3898 if (has_aborted()) return;
3888 3899
3889 3900 // Decide what the target size is, depending whether we're going to
3890 3901 // drain it partially (so that other tasks can steal if they run out
3891 3902 // of things to do) or totally (at the very end).
3892 3903 size_t target_size;
3893 3904 if (partially) {
3894 3905 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3895 3906 } else {
3896 3907 target_size = 0;
3897 3908 }
3898 3909
3899 3910 if (_task_queue->size() > target_size) {
3900 3911 if (_cm->verbose_high()) {
3901 3912 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3902 3913 _worker_id, target_size);
3903 3914 }
3904 3915
3905 3916 oop obj;
3906 3917 bool ret = _task_queue->pop_local(obj);
3907 3918 while (ret) {
3908 3919 statsOnly( ++_local_pops );
3909 3920
3910 3921 if (_cm->verbose_high()) {
3911 3922 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3912 3923 p2i((void*) obj));
3913 3924 }
3914 3925
3915 3926 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3916 3927 assert(!_g1h->is_on_master_free_list(
3917 3928 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3918 3929
3919 3930 scan_object(obj);
3920 3931
3921 3932 if (_task_queue->size() <= target_size || has_aborted()) {
3922 3933 ret = false;
3923 3934 } else {
3924 3935 ret = _task_queue->pop_local(obj);
3925 3936 }
3926 3937 }
3927 3938
3928 3939 if (_cm->verbose_high()) {
3929 3940 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3930 3941 _worker_id, _task_queue->size());
3931 3942 }
3932 3943 }
3933 3944 }
3934 3945
3935 3946 void CMTask::drain_global_stack(bool partially) {
3936 3947 if (has_aborted()) return;
3937 3948
3938 3949 // We have a policy to drain the local queue before we attempt to
3939 3950 // drain the global stack.
3940 3951 assert(partially || _task_queue->size() == 0, "invariant");
3941 3952
3942 3953 // Decide what the target size is, depending whether we're going to
3943 3954 // drain it partially (so that other tasks can steal if they run out
3944 3955 // of things to do) or totally (at the very end). Notice that,
3945 3956 // because we move entries from the global stack in chunks or
3946 3957 // because another task might be doing the same, we might in fact
3947 3958 // drop below the target. But, this is not a problem.
3948 3959 size_t target_size;
3949 3960 if (partially) {
3950 3961 target_size = _cm->partial_mark_stack_size_target();
3951 3962 } else {
3952 3963 target_size = 0;
3953 3964 }
3954 3965
3955 3966 if (_cm->mark_stack_size() > target_size) {
3956 3967 if (_cm->verbose_low()) {
3957 3968 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3958 3969 _worker_id, target_size);
3959 3970 }
3960 3971
3961 3972 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3962 3973 get_entries_from_global_stack();
3963 3974 drain_local_queue(partially);
3964 3975 }
3965 3976
3966 3977 if (_cm->verbose_low()) {
3967 3978 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3968 3979 _worker_id, _cm->mark_stack_size());
3969 3980 }
3970 3981 }
3971 3982 }
3972 3983
3973 3984 // SATB Queue has several assumptions on whether to call the par or
3974 3985 // non-par versions of the methods. this is why some of the code is
3975 3986 // replicated. We should really get rid of the single-threaded version
3976 3987 // of the code to simplify things.
3977 3988 void CMTask::drain_satb_buffers() {
3978 3989 if (has_aborted()) return;
3979 3990
3980 3991 // We set this so that the regular clock knows that we're in the
3981 3992 // middle of draining buffers and doesn't set the abort flag when it
3982 3993 // notices that SATB buffers are available for draining. It'd be
3983 3994 // very counter productive if it did that. :-)
3984 3995 _draining_satb_buffers = true;
3985 3996
3986 3997 CMObjectClosure oc(this);
3987 3998 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3988 3999 if (G1CollectedHeap::use_parallel_gc_threads()) {
3989 4000 satb_mq_set.set_par_closure(_worker_id, &oc);
3990 4001 } else {
3991 4002 satb_mq_set.set_closure(&oc);
3992 4003 }
3993 4004
3994 4005 // This keeps claiming and applying the closure to completed buffers
3995 4006 // until we run out of buffers or we need to abort.
3996 4007 if (G1CollectedHeap::use_parallel_gc_threads()) {
3997 4008 while (!has_aborted() &&
3998 4009 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3999 4010 if (_cm->verbose_medium()) {
4000 4011 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4001 4012 }
4002 4013 statsOnly( ++_satb_buffers_processed );
4003 4014 regular_clock_call();
4004 4015 }
4005 4016 } else {
4006 4017 while (!has_aborted() &&
4007 4018 satb_mq_set.apply_closure_to_completed_buffer()) {
4008 4019 if (_cm->verbose_medium()) {
4009 4020 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4010 4021 }
4011 4022 statsOnly( ++_satb_buffers_processed );
4012 4023 regular_clock_call();
4013 4024 }
4014 4025 }
4015 4026
4016 4027 _draining_satb_buffers = false;
4017 4028
4018 4029 assert(has_aborted() ||
4019 4030 concurrent() ||
4020 4031 satb_mq_set.completed_buffers_num() == 0, "invariant");
4021 4032
4022 4033 if (G1CollectedHeap::use_parallel_gc_threads()) {
4023 4034 satb_mq_set.set_par_closure(_worker_id, NULL);
4024 4035 } else {
4025 4036 satb_mq_set.set_closure(NULL);
4026 4037 }
4027 4038
4028 4039 // again, this was a potentially expensive operation, decrease the
4029 4040 // limits to get the regular clock call early
4030 4041 decrease_limits();
4031 4042 }
4032 4043
4033 4044 void CMTask::print_stats() {
4034 4045 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
4035 4046 _worker_id, _calls);
4036 4047 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4037 4048 _elapsed_time_ms, _termination_time_ms);
4038 4049 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4039 4050 _step_times_ms.num(), _step_times_ms.avg(),
4040 4051 _step_times_ms.sd());
4041 4052 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4042 4053 _step_times_ms.maximum(), _step_times_ms.sum());
4043 4054
4044 4055 #if _MARKING_STATS_
4045 4056 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4046 4057 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4047 4058 _all_clock_intervals_ms.sd());
4048 4059 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4049 4060 _all_clock_intervals_ms.maximum(),
4050 4061 _all_clock_intervals_ms.sum());
4051 4062 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
4052 4063 _clock_due_to_scanning, _clock_due_to_marking);
4053 4064 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
4054 4065 _objs_scanned, _objs_found_on_bitmap);
4055 4066 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
4056 4067 _local_pushes, _local_pops, _local_max_size);
4057 4068 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
4058 4069 _global_pushes, _global_pops, _global_max_size);
4059 4070 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
4060 4071 _global_transfers_to,_global_transfers_from);
4061 4072 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
4062 4073 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
4063 4074 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
4064 4075 _steal_attempts, _steals);
4065 4076 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
4066 4077 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
4067 4078 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4068 4079 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
4069 4080 _aborted_timed_out, _aborted_satb, _aborted_termination);
4070 4081 #endif // _MARKING_STATS_
4071 4082 }
4072 4083
4073 4084 /*****************************************************************************
4074 4085
4075 4086 The do_marking_step(time_target_ms, ...) method is the building
4076 4087 block of the parallel marking framework. It can be called in parallel
4077 4088 with other invocations of do_marking_step() on different tasks
4078 4089 (but only one per task, obviously) and concurrently with the
4079 4090 mutator threads, or during remark, hence it eliminates the need
4080 4091 for two versions of the code. When called during remark, it will
4081 4092 pick up from where the task left off during the concurrent marking
4082 4093 phase. Interestingly, tasks are also claimable during evacuation
4083 4094 pauses too, since do_marking_step() ensures that it aborts before
4084 4095 it needs to yield.
4085 4096
4086 4097 The data structures that it uses to do marking work are the
4087 4098 following:
4088 4099
4089 4100 (1) Marking Bitmap. If there are gray objects that appear only
4090 4101 on the bitmap (this happens either when dealing with an overflow
4091 4102 or when the initial marking phase has simply marked the roots
4092 4103 and didn't push them on the stack), then tasks claim heap
4093 4104 regions whose bitmap they then scan to find gray objects. A
4094 4105 global finger indicates where the end of the last claimed region
4095 4106 is. A local finger indicates how far into the region a task has
4096 4107 scanned. The two fingers are used to determine how to gray an
4097 4108 object (i.e. whether simply marking it is OK, as it will be
4098 4109 visited by a task in the future, or whether it needs to be also
4099 4110 pushed on a stack).
4100 4111
4101 4112 (2) Local Queue. The local queue of the task which is accessed
4102 4113 reasonably efficiently by the task. Other tasks can steal from
4103 4114 it when they run out of work. Throughout the marking phase, a
4104 4115 task attempts to keep its local queue short but not totally
4105 4116 empty, so that entries are available for stealing by other
4106 4117 tasks. Only when there is no more work, a task will totally
4107 4118 drain its local queue.
4108 4119
4109 4120 (3) Global Mark Stack. This handles local queue overflow. During
4110 4121 marking only sets of entries are moved between it and the local
4111 4122 queues, as access to it requires a mutex and more fine-grain
4112 4123 interaction with it which might cause contention. If it
4113 4124 overflows, then the marking phase should restart and iterate
4114 4125 over the bitmap to identify gray objects. Throughout the marking
4115 4126 phase, tasks attempt to keep the global mark stack at a small
4116 4127 length but not totally empty, so that entries are available for
4117 4128 popping by other tasks. Only when there is no more work, tasks
4118 4129 will totally drain the global mark stack.
4119 4130
4120 4131 (4) SATB Buffer Queue. This is where completed SATB buffers are
4121 4132 made available. Buffers are regularly removed from this queue
4122 4133 and scanned for roots, so that the queue doesn't get too
4123 4134 long. During remark, all completed buffers are processed, as
4124 4135 well as the filled in parts of any uncompleted buffers.
4125 4136
4126 4137 The do_marking_step() method tries to abort when the time target
4127 4138 has been reached. There are a few other cases when the
4128 4139 do_marking_step() method also aborts:
4129 4140
4130 4141 (1) When the marking phase has been aborted (after a Full GC).
4131 4142
4132 4143 (2) When a global overflow (on the global stack) has been
4133 4144 triggered. Before the task aborts, it will actually sync up with
4134 4145 the other tasks to ensure that all the marking data structures
4135 4146 (local queues, stacks, fingers etc.) are re-initialized so that
4136 4147 when do_marking_step() completes, the marking phase can
4137 4148 immediately restart.
4138 4149
4139 4150 (3) When enough completed SATB buffers are available. The
4140 4151 do_marking_step() method only tries to drain SATB buffers right
4141 4152 at the beginning. So, if enough buffers are available, the
4142 4153 marking step aborts and the SATB buffers are processed at
4143 4154 the beginning of the next invocation.
4144 4155
4145 4156 (4) To yield. when we have to yield then we abort and yield
4146 4157 right at the end of do_marking_step(). This saves us from a lot
4147 4158 of hassle as, by yielding we might allow a Full GC. If this
4148 4159 happens then objects will be compacted underneath our feet, the
4149 4160 heap might shrink, etc. We save checking for this by just
4150 4161 aborting and doing the yield right at the end.
4151 4162
4152 4163 From the above it follows that the do_marking_step() method should
4153 4164 be called in a loop (or, otherwise, regularly) until it completes.
4154 4165
4155 4166 If a marking step completes without its has_aborted() flag being
4156 4167 true, it means it has completed the current marking phase (and
4157 4168 also all other marking tasks have done so and have all synced up).
4158 4169
4159 4170 A method called regular_clock_call() is invoked "regularly" (in
4160 4171 sub ms intervals) throughout marking. It is this clock method that
4161 4172 checks all the abort conditions which were mentioned above and
4162 4173 decides when the task should abort. A work-based scheme is used to
4163 4174 trigger this clock method: when the number of object words the
4164 4175 marking phase has scanned or the number of references the marking
4165 4176 phase has visited reach a given limit. Additional invocations to
4166 4177 the method clock have been planted in a few other strategic places
4167 4178 too. The initial reason for the clock method was to avoid calling
4168 4179 vtime too regularly, as it is quite expensive. So, once it was in
4169 4180 place, it was natural to piggy-back all the other conditions on it
4170 4181 too and not constantly check them throughout the code.
4171 4182
4172 4183 If do_termination is true then do_marking_step will enter its
4173 4184 termination protocol.
4174 4185
4175 4186 The value of is_serial must be true when do_marking_step is being
4176 4187 called serially (i.e. by the VMThread) and do_marking_step should
4177 4188 skip any synchronization in the termination and overflow code.
4178 4189 Examples include the serial remark code and the serial reference
4179 4190 processing closures.
4180 4191
4181 4192 The value of is_serial must be false when do_marking_step is
4182 4193 being called by any of the worker threads in a work gang.
4183 4194 Examples include the concurrent marking code (CMMarkingTask),
4184 4195 the MT remark code, and the MT reference processing closures.
4185 4196
4186 4197 *****************************************************************************/
4187 4198
4188 4199 void CMTask::do_marking_step(double time_target_ms,
4189 4200 bool do_termination,
4190 4201 bool is_serial) {
4191 4202 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4192 4203 assert(concurrent() == _cm->concurrent(), "they should be the same");
4193 4204
4194 4205 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4195 4206 assert(_task_queues != NULL, "invariant");
4196 4207 assert(_task_queue != NULL, "invariant");
4197 4208 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4198 4209
4199 4210 assert(!_claimed,
4200 4211 "only one thread should claim this task at any one time");
4201 4212
4202 4213 // OK, this doesn't safeguard again all possible scenarios, as it is
4203 4214 // possible for two threads to set the _claimed flag at the same
4204 4215 // time. But it is only for debugging purposes anyway and it will
4205 4216 // catch most problems.
4206 4217 _claimed = true;
4207 4218
4208 4219 _start_time_ms = os::elapsedVTime() * 1000.0;
4209 4220 statsOnly( _interval_start_time_ms = _start_time_ms );
4210 4221
4211 4222 // If do_stealing is true then do_marking_step will attempt to
4212 4223 // steal work from the other CMTasks. It only makes sense to
4213 4224 // enable stealing when the termination protocol is enabled
4214 4225 // and do_marking_step() is not being called serially.
4215 4226 bool do_stealing = do_termination && !is_serial;
4216 4227
4217 4228 double diff_prediction_ms =
4218 4229 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4219 4230 _time_target_ms = time_target_ms - diff_prediction_ms;
4220 4231
4221 4232 // set up the variables that are used in the work-based scheme to
4222 4233 // call the regular clock method
4223 4234 _words_scanned = 0;
4224 4235 _refs_reached = 0;
4225 4236 recalculate_limits();
4226 4237
4227 4238 // clear all flags
4228 4239 clear_has_aborted();
4229 4240 _has_timed_out = false;
4230 4241 _draining_satb_buffers = false;
4231 4242
4232 4243 ++_calls;
4233 4244
4234 4245 if (_cm->verbose_low()) {
4235 4246 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4236 4247 "target = %1.2lfms >>>>>>>>>>",
4237 4248 _worker_id, _calls, _time_target_ms);
4238 4249 }
4239 4250
4240 4251 // Set up the bitmap and oop closures. Anything that uses them is
4241 4252 // eventually called from this method, so it is OK to allocate these
4242 4253 // statically.
4243 4254 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4244 4255 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4245 4256 set_cm_oop_closure(&cm_oop_closure);
4246 4257
4247 4258 if (_cm->has_overflown()) {
4248 4259 // This can happen if the mark stack overflows during a GC pause
4249 4260 // and this task, after a yield point, restarts. We have to abort
4250 4261 // as we need to get into the overflow protocol which happens
4251 4262 // right at the end of this task.
4252 4263 set_has_aborted();
4253 4264 }
4254 4265
4255 4266 // First drain any available SATB buffers. After this, we will not
4256 4267 // look at SATB buffers before the next invocation of this method.
4257 4268 // If enough completed SATB buffers are queued up, the regular clock
4258 4269 // will abort this task so that it restarts.
4259 4270 drain_satb_buffers();
4260 4271 // ...then partially drain the local queue and the global stack
4261 4272 drain_local_queue(true);
4262 4273 drain_global_stack(true);
4263 4274
4264 4275 do {
4265 4276 if (!has_aborted() && _curr_region != NULL) {
4266 4277 // This means that we're already holding on to a region.
4267 4278 assert(_finger != NULL, "if region is not NULL, then the finger "
4268 4279 "should not be NULL either");
4269 4280
4270 4281 // We might have restarted this task after an evacuation pause
4271 4282 // which might have evacuated the region we're holding on to
4272 4283 // underneath our feet. Let's read its limit again to make sure
4273 4284 // that we do not iterate over a region of the heap that
4274 4285 // contains garbage (update_region_limit() will also move
4275 4286 // _finger to the start of the region if it is found empty).
4276 4287 update_region_limit();
4277 4288 // We will start from _finger not from the start of the region,
4278 4289 // as we might be restarting this task after aborting half-way
4279 4290 // through scanning this region. In this case, _finger points to
4280 4291 // the address where we last found a marked object. If this is a
4281 4292 // fresh region, _finger points to start().
4282 4293 MemRegion mr = MemRegion(_finger, _region_limit);
4283 4294
4284 4295 if (_cm->verbose_low()) {
4285 4296 gclog_or_tty->print_cr("[%u] we're scanning part "
4286 4297 "["PTR_FORMAT", "PTR_FORMAT") "
4287 4298 "of region "HR_FORMAT,
4288 4299 _worker_id, p2i(_finger), p2i(_region_limit),
4289 4300 HR_FORMAT_PARAMS(_curr_region));
4290 4301 }
4291 4302
4292 4303 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4293 4304 "humongous regions should go around loop once only");
4294 4305
4295 4306 // Some special cases:
4296 4307 // If the memory region is empty, we can just give up the region.
4297 4308 // If the current region is humongous then we only need to check
4298 4309 // the bitmap for the bit associated with the start of the object,
4299 4310 // scan the object if it's live, and give up the region.
4300 4311 // Otherwise, let's iterate over the bitmap of the part of the region
4301 4312 // that is left.
4302 4313 // If the iteration is successful, give up the region.
4303 4314 if (mr.is_empty()) {
4304 4315 giveup_current_region();
4305 4316 regular_clock_call();
4306 4317 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4307 4318 if (_nextMarkBitMap->isMarked(mr.start())) {
4308 4319 // The object is marked - apply the closure
4309 4320 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4310 4321 bitmap_closure.do_bit(offset);
4311 4322 }
4312 4323 // Even if this task aborted while scanning the humongous object
4313 4324 // we can (and should) give up the current region.
4314 4325 giveup_current_region();
4315 4326 regular_clock_call();
4316 4327 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4317 4328 giveup_current_region();
4318 4329 regular_clock_call();
4319 4330 } else {
4320 4331 assert(has_aborted(), "currently the only way to do so");
4321 4332 // The only way to abort the bitmap iteration is to return
4322 4333 // false from the do_bit() method. However, inside the
4323 4334 // do_bit() method we move the _finger to point to the
4324 4335 // object currently being looked at. So, if we bail out, we
4325 4336 // have definitely set _finger to something non-null.
4326 4337 assert(_finger != NULL, "invariant");
4327 4338
4328 4339 // Region iteration was actually aborted. So now _finger
4329 4340 // points to the address of the object we last scanned. If we
4330 4341 // leave it there, when we restart this task, we will rescan
4331 4342 // the object. It is easy to avoid this. We move the finger by
4332 4343 // enough to point to the next possible object header (the
4333 4344 // bitmap knows by how much we need to move it as it knows its
4334 4345 // granularity).
4335 4346 assert(_finger < _region_limit, "invariant");
4336 4347 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4337 4348 // Check if bitmap iteration was aborted while scanning the last object
4338 4349 if (new_finger >= _region_limit) {
4339 4350 giveup_current_region();
4340 4351 } else {
4341 4352 move_finger_to(new_finger);
4342 4353 }
4343 4354 }
4344 4355 }
4345 4356 // At this point we have either completed iterating over the
4346 4357 // region we were holding on to, or we have aborted.
4347 4358
4348 4359 // We then partially drain the local queue and the global stack.
4349 4360 // (Do we really need this?)
4350 4361 drain_local_queue(true);
4351 4362 drain_global_stack(true);
4352 4363
4353 4364 // Read the note on the claim_region() method on why it might
4354 4365 // return NULL with potentially more regions available for
4355 4366 // claiming and why we have to check out_of_regions() to determine
4356 4367 // whether we're done or not.
4357 4368 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4358 4369 // We are going to try to claim a new region. We should have
4359 4370 // given up on the previous one.
4360 4371 // Separated the asserts so that we know which one fires.
4361 4372 assert(_curr_region == NULL, "invariant");
4362 4373 assert(_finger == NULL, "invariant");
4363 4374 assert(_region_limit == NULL, "invariant");
4364 4375 if (_cm->verbose_low()) {
4365 4376 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4366 4377 }
4367 4378 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4368 4379 if (claimed_region != NULL) {
4369 4380 // Yes, we managed to claim one
4370 4381 statsOnly( ++_regions_claimed );
4371 4382
4372 4383 if (_cm->verbose_low()) {
4373 4384 gclog_or_tty->print_cr("[%u] we successfully claimed "
4374 4385 "region "PTR_FORMAT,
4375 4386 _worker_id, p2i(claimed_region));
4376 4387 }
4377 4388
4378 4389 setup_for_region(claimed_region);
4379 4390 assert(_curr_region == claimed_region, "invariant");
4380 4391 }
4381 4392 // It is important to call the regular clock here. It might take
4382 4393 // a while to claim a region if, for example, we hit a large
4383 4394 // block of empty regions. So we need to call the regular clock
4384 4395 // method once round the loop to make sure it's called
4385 4396 // frequently enough.
4386 4397 regular_clock_call();
4387 4398 }
4388 4399
4389 4400 if (!has_aborted() && _curr_region == NULL) {
4390 4401 assert(_cm->out_of_regions(),
4391 4402 "at this point we should be out of regions");
4392 4403 }
4393 4404 } while ( _curr_region != NULL && !has_aborted());
4394 4405
4395 4406 if (!has_aborted()) {
4396 4407 // We cannot check whether the global stack is empty, since other
4397 4408 // tasks might be pushing objects to it concurrently.
4398 4409 assert(_cm->out_of_regions(),
4399 4410 "at this point we should be out of regions");
4400 4411
4401 4412 if (_cm->verbose_low()) {
4402 4413 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4403 4414 }
4404 4415
4405 4416 // Try to reduce the number of available SATB buffers so that
4406 4417 // remark has less work to do.
4407 4418 drain_satb_buffers();
4408 4419 }
4409 4420
4410 4421 // Since we've done everything else, we can now totally drain the
4411 4422 // local queue and global stack.
4412 4423 drain_local_queue(false);
4413 4424 drain_global_stack(false);
4414 4425
4415 4426 // Attempt at work stealing from other task's queues.
4416 4427 if (do_stealing && !has_aborted()) {
4417 4428 // We have not aborted. This means that we have finished all that
4418 4429 // we could. Let's try to do some stealing...
4419 4430
4420 4431 // We cannot check whether the global stack is empty, since other
4421 4432 // tasks might be pushing objects to it concurrently.
4422 4433 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4423 4434 "only way to reach here");
4424 4435
4425 4436 if (_cm->verbose_low()) {
4426 4437 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4427 4438 }
4428 4439
4429 4440 while (!has_aborted()) {
4430 4441 oop obj;
4431 4442 statsOnly( ++_steal_attempts );
4432 4443
4433 4444 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4434 4445 if (_cm->verbose_medium()) {
4435 4446 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4436 4447 _worker_id, p2i((void*) obj));
4437 4448 }
4438 4449
4439 4450 statsOnly( ++_steals );
4440 4451
4441 4452 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4442 4453 "any stolen object should be marked");
4443 4454 scan_object(obj);
4444 4455
4445 4456 // And since we're towards the end, let's totally drain the
4446 4457 // local queue and global stack.
4447 4458 drain_local_queue(false);
4448 4459 drain_global_stack(false);
4449 4460 } else {
4450 4461 break;
4451 4462 }
4452 4463 }
4453 4464 }
4454 4465
4455 4466 // If we are about to wrap up and go into termination, check if we
4456 4467 // should raise the overflow flag.
4457 4468 if (do_termination && !has_aborted()) {
4458 4469 if (_cm->force_overflow()->should_force()) {
4459 4470 _cm->set_has_overflown();
4460 4471 regular_clock_call();
4461 4472 }
4462 4473 }
4463 4474
4464 4475 // We still haven't aborted. Now, let's try to get into the
4465 4476 // termination protocol.
4466 4477 if (do_termination && !has_aborted()) {
4467 4478 // We cannot check whether the global stack is empty, since other
4468 4479 // tasks might be concurrently pushing objects on it.
4469 4480 // Separated the asserts so that we know which one fires.
4470 4481 assert(_cm->out_of_regions(), "only way to reach here");
4471 4482 assert(_task_queue->size() == 0, "only way to reach here");
4472 4483
4473 4484 if (_cm->verbose_low()) {
4474 4485 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4475 4486 }
4476 4487
4477 4488 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4478 4489
4479 4490 // The CMTask class also extends the TerminatorTerminator class,
4480 4491 // hence its should_exit_termination() method will also decide
4481 4492 // whether to exit the termination protocol or not.
4482 4493 bool finished = (is_serial ||
4483 4494 _cm->terminator()->offer_termination(this));
4484 4495 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4485 4496 _termination_time_ms +=
4486 4497 termination_end_time_ms - _termination_start_time_ms;
4487 4498
4488 4499 if (finished) {
4489 4500 // We're all done.
4490 4501
4491 4502 if (_worker_id == 0) {
4492 4503 // let's allow task 0 to do this
4493 4504 if (concurrent()) {
4494 4505 assert(_cm->concurrent_marking_in_progress(), "invariant");
4495 4506 // we need to set this to false before the next
4496 4507 // safepoint. This way we ensure that the marking phase
4497 4508 // doesn't observe any more heap expansions.
4498 4509 _cm->clear_concurrent_marking_in_progress();
4499 4510 }
4500 4511 }
4501 4512
4502 4513 // We can now guarantee that the global stack is empty, since
4503 4514 // all other tasks have finished. We separated the guarantees so
4504 4515 // that, if a condition is false, we can immediately find out
4505 4516 // which one.
4506 4517 guarantee(_cm->out_of_regions(), "only way to reach here");
4507 4518 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4508 4519 guarantee(_task_queue->size() == 0, "only way to reach here");
4509 4520 guarantee(!_cm->has_overflown(), "only way to reach here");
4510 4521 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4511 4522
4512 4523 if (_cm->verbose_low()) {
4513 4524 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4514 4525 }
4515 4526 } else {
4516 4527 // Apparently there's more work to do. Let's abort this task. It
4517 4528 // will restart it and we can hopefully find more things to do.
4518 4529
4519 4530 if (_cm->verbose_low()) {
4520 4531 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4521 4532 _worker_id);
4522 4533 }
4523 4534
4524 4535 set_has_aborted();
4525 4536 statsOnly( ++_aborted_termination );
4526 4537 }
4527 4538 }
4528 4539
4529 4540 // Mainly for debugging purposes to make sure that a pointer to the
4530 4541 // closure which was statically allocated in this frame doesn't
4531 4542 // escape it by accident.
4532 4543 set_cm_oop_closure(NULL);
4533 4544 double end_time_ms = os::elapsedVTime() * 1000.0;
4534 4545 double elapsed_time_ms = end_time_ms - _start_time_ms;
4535 4546 // Update the step history.
4536 4547 _step_times_ms.add(elapsed_time_ms);
4537 4548
4538 4549 if (has_aborted()) {
4539 4550 // The task was aborted for some reason.
4540 4551
4541 4552 statsOnly( ++_aborted );
4542 4553
4543 4554 if (_has_timed_out) {
4544 4555 double diff_ms = elapsed_time_ms - _time_target_ms;
4545 4556 // Keep statistics of how well we did with respect to hitting
4546 4557 // our target only if we actually timed out (if we aborted for
4547 4558 // other reasons, then the results might get skewed).
4548 4559 _marking_step_diffs_ms.add(diff_ms);
4549 4560 }
4550 4561
4551 4562 if (_cm->has_overflown()) {
4552 4563 // This is the interesting one. We aborted because a global
4553 4564 // overflow was raised. This means we have to restart the
4554 4565 // marking phase and start iterating over regions. However, in
4555 4566 // order to do this we have to make sure that all tasks stop
4556 4567 // what they are doing and re-initialise in a safe manner. We
4557 4568 // will achieve this with the use of two barrier sync points.
4558 4569
4559 4570 if (_cm->verbose_low()) {
4560 4571 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4561 4572 }
4562 4573
4563 4574 if (!is_serial) {
4564 4575 // We only need to enter the sync barrier if being called
4565 4576 // from a parallel context
4566 4577 _cm->enter_first_sync_barrier(_worker_id);
4567 4578
4568 4579 // When we exit this sync barrier we know that all tasks have
4569 4580 // stopped doing marking work. So, it's now safe to
4570 4581 // re-initialise our data structures. At the end of this method,
4571 4582 // task 0 will clear the global data structures.
4572 4583 }
4573 4584
4574 4585 statsOnly( ++_aborted_overflow );
4575 4586
4576 4587 // We clear the local state of this task...
4577 4588 clear_region_fields();
4578 4589
4579 4590 if (!is_serial) {
4580 4591 // ...and enter the second barrier.
4581 4592 _cm->enter_second_sync_barrier(_worker_id);
4582 4593 }
4583 4594 // At this point, if we're during the concurrent phase of
4584 4595 // marking, everything has been re-initialized and we're
4585 4596 // ready to restart.
4586 4597 }
4587 4598
4588 4599 if (_cm->verbose_low()) {
4589 4600 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4590 4601 "elapsed = %1.2lfms <<<<<<<<<<",
4591 4602 _worker_id, _time_target_ms, elapsed_time_ms);
4592 4603 if (_cm->has_aborted()) {
4593 4604 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4594 4605 _worker_id);
4595 4606 }
4596 4607 }
4597 4608 } else {
4598 4609 if (_cm->verbose_low()) {
4599 4610 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4600 4611 "elapsed = %1.2lfms <<<<<<<<<<",
4601 4612 _worker_id, _time_target_ms, elapsed_time_ms);
4602 4613 }
4603 4614 }
4604 4615
4605 4616 _claimed = false;
4606 4617 }
4607 4618
4608 4619 CMTask::CMTask(uint worker_id,
4609 4620 ConcurrentMark* cm,
4610 4621 size_t* marked_bytes,
4611 4622 BitMap* card_bm,
4612 4623 CMTaskQueue* task_queue,
4613 4624 CMTaskQueueSet* task_queues)
4614 4625 : _g1h(G1CollectedHeap::heap()),
4615 4626 _worker_id(worker_id), _cm(cm),
4616 4627 _claimed(false),
4617 4628 _nextMarkBitMap(NULL), _hash_seed(17),
4618 4629 _task_queue(task_queue),
4619 4630 _task_queues(task_queues),
4620 4631 _cm_oop_closure(NULL),
4621 4632 _marked_bytes_array(marked_bytes),
4622 4633 _card_bm(card_bm) {
4623 4634 guarantee(task_queue != NULL, "invariant");
4624 4635 guarantee(task_queues != NULL, "invariant");
4625 4636
4626 4637 statsOnly( _clock_due_to_scanning = 0;
4627 4638 _clock_due_to_marking = 0 );
4628 4639
4629 4640 _marking_step_diffs_ms.add(0.5);
4630 4641 }
4631 4642
4632 4643 // These are formatting macros that are used below to ensure
4633 4644 // consistent formatting. The *_H_* versions are used to format the
4634 4645 // header for a particular value and they should be kept consistent
4635 4646 // with the corresponding macro. Also note that most of the macros add
4636 4647 // the necessary white space (as a prefix) which makes them a bit
4637 4648 // easier to compose.
4638 4649
4639 4650 // All the output lines are prefixed with this string to be able to
4640 4651 // identify them easily in a large log file.
4641 4652 #define G1PPRL_LINE_PREFIX "###"
4642 4653
4643 4654 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4644 4655 #ifdef _LP64
4645 4656 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4646 4657 #else // _LP64
4647 4658 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4648 4659 #endif // _LP64
4649 4660
4650 4661 // For per-region info
4651 4662 #define G1PPRL_TYPE_FORMAT " %-4s"
4652 4663 #define G1PPRL_TYPE_H_FORMAT " %4s"
4653 4664 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4654 4665 #define G1PPRL_BYTE_H_FORMAT " %9s"
4655 4666 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4656 4667 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4657 4668
4658 4669 // For summary info
4659 4670 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4660 4671 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4661 4672 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4662 4673 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4663 4674
4664 4675 G1PrintRegionLivenessInfoClosure::
4665 4676 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4666 4677 : _out(out),
4667 4678 _total_used_bytes(0), _total_capacity_bytes(0),
4668 4679 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4669 4680 _hum_used_bytes(0), _hum_capacity_bytes(0),
4670 4681 _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4671 4682 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4672 4683 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4673 4684 MemRegion g1_reserved = g1h->g1_reserved();
4674 4685 double now = os::elapsedTime();
4675 4686
4676 4687 // Print the header of the output.
4677 4688 _out->cr();
4678 4689 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4679 4690 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4680 4691 G1PPRL_SUM_ADDR_FORMAT("reserved")
4681 4692 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4682 4693 p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4683 4694 HeapRegion::GrainBytes);
4684 4695 _out->print_cr(G1PPRL_LINE_PREFIX);
4685 4696 _out->print_cr(G1PPRL_LINE_PREFIX
4686 4697 G1PPRL_TYPE_H_FORMAT
4687 4698 G1PPRL_ADDR_BASE_H_FORMAT
4688 4699 G1PPRL_BYTE_H_FORMAT
4689 4700 G1PPRL_BYTE_H_FORMAT
4690 4701 G1PPRL_BYTE_H_FORMAT
4691 4702 G1PPRL_DOUBLE_H_FORMAT
4692 4703 G1PPRL_BYTE_H_FORMAT
4693 4704 G1PPRL_BYTE_H_FORMAT,
4694 4705 "type", "address-range",
4695 4706 "used", "prev-live", "next-live", "gc-eff",
4696 4707 "remset", "code-roots");
4697 4708 _out->print_cr(G1PPRL_LINE_PREFIX
4698 4709 G1PPRL_TYPE_H_FORMAT
4699 4710 G1PPRL_ADDR_BASE_H_FORMAT
4700 4711 G1PPRL_BYTE_H_FORMAT
4701 4712 G1PPRL_BYTE_H_FORMAT
4702 4713 G1PPRL_BYTE_H_FORMAT
4703 4714 G1PPRL_DOUBLE_H_FORMAT
4704 4715 G1PPRL_BYTE_H_FORMAT
4705 4716 G1PPRL_BYTE_H_FORMAT,
4706 4717 "", "",
4707 4718 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4708 4719 "(bytes)", "(bytes)");
4709 4720 }
4710 4721
4711 4722 // It takes as a parameter a reference to one of the _hum_* fields, it
4712 4723 // deduces the corresponding value for a region in a humongous region
4713 4724 // series (either the region size, or what's left if the _hum_* field
4714 4725 // is < the region size), and updates the _hum_* field accordingly.
4715 4726 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4716 4727 size_t bytes = 0;
4717 4728 // The > 0 check is to deal with the prev and next live bytes which
4718 4729 // could be 0.
4719 4730 if (*hum_bytes > 0) {
4720 4731 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4721 4732 *hum_bytes -= bytes;
4722 4733 }
4723 4734 return bytes;
4724 4735 }
4725 4736
4726 4737 // It deduces the values for a region in a humongous region series
4727 4738 // from the _hum_* fields and updates those accordingly. It assumes
4728 4739 // that that _hum_* fields have already been set up from the "starts
4729 4740 // humongous" region and we visit the regions in address order.
4730 4741 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4731 4742 size_t* capacity_bytes,
4732 4743 size_t* prev_live_bytes,
4733 4744 size_t* next_live_bytes) {
4734 4745 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4735 4746 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4736 4747 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4737 4748 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4738 4749 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4739 4750 }
4740 4751
4741 4752 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4742 4753 const char* type = r->get_type_str();
4743 4754 HeapWord* bottom = r->bottom();
4744 4755 HeapWord* end = r->end();
4745 4756 size_t capacity_bytes = r->capacity();
4746 4757 size_t used_bytes = r->used();
4747 4758 size_t prev_live_bytes = r->live_bytes();
4748 4759 size_t next_live_bytes = r->next_live_bytes();
4749 4760 double gc_eff = r->gc_efficiency();
4750 4761 size_t remset_bytes = r->rem_set()->mem_size();
4751 4762 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4752 4763
4753 4764 if (r->startsHumongous()) {
4754 4765 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4755 4766 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4756 4767 "they should have been zeroed after the last time we used them");
4757 4768 // Set up the _hum_* fields.
4758 4769 _hum_capacity_bytes = capacity_bytes;
4759 4770 _hum_used_bytes = used_bytes;
4760 4771 _hum_prev_live_bytes = prev_live_bytes;
4761 4772 _hum_next_live_bytes = next_live_bytes;
4762 4773 get_hum_bytes(&used_bytes, &capacity_bytes,
4763 4774 &prev_live_bytes, &next_live_bytes);
4764 4775 end = bottom + HeapRegion::GrainWords;
4765 4776 } else if (r->continuesHumongous()) {
4766 4777 get_hum_bytes(&used_bytes, &capacity_bytes,
4767 4778 &prev_live_bytes, &next_live_bytes);
4768 4779 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4769 4780 }
4770 4781
4771 4782 _total_used_bytes += used_bytes;
4772 4783 _total_capacity_bytes += capacity_bytes;
4773 4784 _total_prev_live_bytes += prev_live_bytes;
4774 4785 _total_next_live_bytes += next_live_bytes;
4775 4786 _total_remset_bytes += remset_bytes;
4776 4787 _total_strong_code_roots_bytes += strong_code_roots_bytes;
4777 4788
4778 4789 // Print a line for this particular region.
4779 4790 _out->print_cr(G1PPRL_LINE_PREFIX
4780 4791 G1PPRL_TYPE_FORMAT
4781 4792 G1PPRL_ADDR_BASE_FORMAT
4782 4793 G1PPRL_BYTE_FORMAT
4783 4794 G1PPRL_BYTE_FORMAT
4784 4795 G1PPRL_BYTE_FORMAT
4785 4796 G1PPRL_DOUBLE_FORMAT
4786 4797 G1PPRL_BYTE_FORMAT
4787 4798 G1PPRL_BYTE_FORMAT,
4788 4799 type, p2i(bottom), p2i(end),
4789 4800 used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4790 4801 remset_bytes, strong_code_roots_bytes);
4791 4802
4792 4803 return false;
4793 4804 }
4794 4805
4795 4806 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4796 4807 // add static memory usages to remembered set sizes
4797 4808 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4798 4809 // Print the footer of the output.
4799 4810 _out->print_cr(G1PPRL_LINE_PREFIX);
4800 4811 _out->print_cr(G1PPRL_LINE_PREFIX
4801 4812 " SUMMARY"
4802 4813 G1PPRL_SUM_MB_FORMAT("capacity")
4803 4814 G1PPRL_SUM_MB_PERC_FORMAT("used")
4804 4815 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4805 4816 G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4806 4817 G1PPRL_SUM_MB_FORMAT("remset")
4807 4818 G1PPRL_SUM_MB_FORMAT("code-roots"),
4808 4819 bytes_to_mb(_total_capacity_bytes),
4809 4820 bytes_to_mb(_total_used_bytes),
4810 4821 perc(_total_used_bytes, _total_capacity_bytes),
4811 4822 bytes_to_mb(_total_prev_live_bytes),
4812 4823 perc(_total_prev_live_bytes, _total_capacity_bytes),
4813 4824 bytes_to_mb(_total_next_live_bytes),
4814 4825 perc(_total_next_live_bytes, _total_capacity_bytes),
4815 4826 bytes_to_mb(_total_remset_bytes),
4816 4827 bytes_to_mb(_total_strong_code_roots_bytes));
4817 4828 _out->cr();
4818 4829 }
↓ open down ↓ |
2193 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX