Print this page
rev 3640 : 7200261: G1: Liveness counting inconsistencies during marking verification
Summary: The clipping code in the routine that sets the bits for a range of cards, in the liveness accounting verification code was incorrect. It set all the bits in the card bitmap from the given starting index which would lead to spurious marking verification failures.
Reviewed-by:
* * *
[mq]: code-review-comments
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 32 #include "gc_implementation/g1/g1Log.hpp"
33 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
34 34 #include "gc_implementation/g1/g1RemSet.hpp"
35 35 #include "gc_implementation/g1/heapRegion.inline.hpp"
36 36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
37 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
38 38 #include "gc_implementation/shared/vmGCOperations.hpp"
39 39 #include "memory/genOopClosures.inline.hpp"
40 40 #include "memory/referencePolicy.hpp"
41 41 #include "memory/resourceArea.hpp"
42 42 #include "oops/oop.inline.hpp"
43 43 #include "runtime/handles.inline.hpp"
44 44 #include "runtime/java.hpp"
45 45 #include "services/memTracker.hpp"
46 46
47 47 // Concurrent marking bit map wrapper
48 48
49 49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
50 50 _bm((uintptr_t*)NULL,0),
51 51 _shifter(shifter) {
52 52 _bmStartWord = (HeapWord*)(rs.base());
53 53 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
54 54 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
55 55 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
56 56
57 57 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
58 58
59 59 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
60 60 // For now we'll just commit all of the bit map up fromt.
61 61 // Later on we'll try to be more parsimonious with swap.
62 62 guarantee(_virtual_space.initialize(brs, brs.size()),
63 63 "couldn't reseve backing store for concurrent marking bit map");
64 64 assert(_virtual_space.committed_size() == brs.size(),
65 65 "didn't reserve backing store for all of concurrent marking bit map?");
66 66 _bm.set_map((uintptr_t*)_virtual_space.low());
67 67 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
68 68 _bmWordSize, "inconsistency in bit map sizing");
69 69 _bm.set_size(_bmWordSize >> _shifter);
70 70 }
71 71
72 72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
73 73 HeapWord* limit) const {
74 74 // First we must round addr *up* to a possible object boundary.
75 75 addr = (HeapWord*)align_size_up((intptr_t)addr,
76 76 HeapWordSize << _shifter);
77 77 size_t addrOffset = heapWordToOffset(addr);
78 78 if (limit == NULL) {
79 79 limit = _bmStartWord + _bmWordSize;
80 80 }
81 81 size_t limitOffset = heapWordToOffset(limit);
82 82 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
83 83 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
84 84 assert(nextAddr >= addr, "get_next_one postcondition");
85 85 assert(nextAddr == limit || isMarked(nextAddr),
86 86 "get_next_one postcondition");
87 87 return nextAddr;
88 88 }
89 89
90 90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
91 91 HeapWord* limit) const {
92 92 size_t addrOffset = heapWordToOffset(addr);
93 93 if (limit == NULL) {
94 94 limit = _bmStartWord + _bmWordSize;
95 95 }
96 96 size_t limitOffset = heapWordToOffset(limit);
97 97 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
98 98 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
99 99 assert(nextAddr >= addr, "get_next_one postcondition");
100 100 assert(nextAddr == limit || !isMarked(nextAddr),
101 101 "get_next_one postcondition");
102 102 return nextAddr;
103 103 }
104 104
105 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
106 106 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
107 107 return (int) (diff >> _shifter);
108 108 }
109 109
110 110 #ifndef PRODUCT
111 111 bool CMBitMapRO::covers(ReservedSpace rs) const {
112 112 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
113 113 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
114 114 "size inconsistency");
115 115 return _bmStartWord == (HeapWord*)(rs.base()) &&
116 116 _bmWordSize == rs.size()>>LogHeapWordSize;
117 117 }
118 118 #endif
119 119
120 120 void CMBitMap::clearAll() {
121 121 _bm.clear();
122 122 return;
123 123 }
124 124
125 125 void CMBitMap::markRange(MemRegion mr) {
126 126 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
127 127 assert(!mr.is_empty(), "unexpected empty region");
128 128 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
129 129 ((HeapWord *) mr.end())),
130 130 "markRange memory region end is not card aligned");
131 131 // convert address range into offset range
132 132 _bm.at_put_range(heapWordToOffset(mr.start()),
133 133 heapWordToOffset(mr.end()), true);
134 134 }
135 135
136 136 void CMBitMap::clearRange(MemRegion mr) {
137 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
138 138 assert(!mr.is_empty(), "unexpected empty region");
139 139 // convert address range into offset range
140 140 _bm.at_put_range(heapWordToOffset(mr.start()),
141 141 heapWordToOffset(mr.end()), false);
142 142 }
143 143
144 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
145 145 HeapWord* end_addr) {
146 146 HeapWord* start = getNextMarkedWordAddress(addr);
147 147 start = MIN2(start, end_addr);
148 148 HeapWord* end = getNextUnmarkedWordAddress(start);
149 149 end = MIN2(end, end_addr);
150 150 assert(start <= end, "Consistency check");
151 151 MemRegion mr(start, end);
152 152 if (!mr.is_empty()) {
153 153 clearRange(mr);
154 154 }
155 155 return mr;
156 156 }
157 157
158 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
159 159 _base(NULL), _cm(cm)
160 160 #ifdef ASSERT
161 161 , _drain_in_progress(false)
162 162 , _drain_in_progress_yields(false)
163 163 #endif
164 164 {}
165 165
166 166 void CMMarkStack::allocate(size_t size) {
167 167 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
168 168 if (_base == NULL) {
169 169 vm_exit_during_initialization("Failed to allocate CM region mark stack");
170 170 }
171 171 _index = 0;
172 172 _capacity = (jint) size;
173 173 _saved_index = -1;
174 174 NOT_PRODUCT(_max_depth = 0);
175 175 }
176 176
177 177 CMMarkStack::~CMMarkStack() {
178 178 if (_base != NULL) {
179 179 FREE_C_HEAP_ARRAY(oop, _base, mtGC);
180 180 }
181 181 }
182 182
183 183 void CMMarkStack::par_push(oop ptr) {
184 184 while (true) {
185 185 if (isFull()) {
186 186 _overflow = true;
187 187 return;
188 188 }
189 189 // Otherwise...
190 190 jint index = _index;
191 191 jint next_index = index+1;
192 192 jint res = Atomic::cmpxchg(next_index, &_index, index);
193 193 if (res == index) {
194 194 _base[index] = ptr;
195 195 // Note that we don't maintain this atomically. We could, but it
196 196 // doesn't seem necessary.
197 197 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
198 198 return;
199 199 }
200 200 // Otherwise, we need to try again.
201 201 }
202 202 }
203 203
204 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
205 205 while (true) {
206 206 if (isFull()) {
207 207 _overflow = true;
208 208 return;
209 209 }
210 210 // Otherwise...
211 211 jint index = _index;
212 212 jint next_index = index + n;
213 213 if (next_index > _capacity) {
214 214 _overflow = true;
215 215 return;
216 216 }
217 217 jint res = Atomic::cmpxchg(next_index, &_index, index);
218 218 if (res == index) {
219 219 for (int i = 0; i < n; i++) {
220 220 int ind = index + i;
221 221 assert(ind < _capacity, "By overflow test above.");
222 222 _base[ind] = ptr_arr[i];
223 223 }
224 224 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
225 225 return;
226 226 }
227 227 // Otherwise, we need to try again.
228 228 }
229 229 }
230 230
231 231
232 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
233 233 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
234 234 jint start = _index;
235 235 jint next_index = start + n;
236 236 if (next_index > _capacity) {
237 237 _overflow = true;
238 238 return;
239 239 }
240 240 // Otherwise.
241 241 _index = next_index;
242 242 for (int i = 0; i < n; i++) {
243 243 int ind = start + i;
244 244 assert(ind < _capacity, "By overflow test above.");
245 245 _base[ind] = ptr_arr[i];
246 246 }
247 247 }
248 248
249 249
250 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
251 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
252 252 jint index = _index;
253 253 if (index == 0) {
254 254 *n = 0;
255 255 return false;
256 256 } else {
257 257 int k = MIN2(max, index);
258 258 jint new_ind = index - k;
259 259 for (int j = 0; j < k; j++) {
260 260 ptr_arr[j] = _base[new_ind + j];
261 261 }
262 262 _index = new_ind;
263 263 *n = k;
264 264 return true;
265 265 }
266 266 }
267 267
268 268 template<class OopClosureClass>
269 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
270 270 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
271 271 || SafepointSynchronize::is_at_safepoint(),
272 272 "Drain recursion must be yield-safe.");
273 273 bool res = true;
274 274 debug_only(_drain_in_progress = true);
275 275 debug_only(_drain_in_progress_yields = yield_after);
276 276 while (!isEmpty()) {
277 277 oop newOop = pop();
278 278 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
279 279 assert(newOop->is_oop(), "Expected an oop");
280 280 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
281 281 "only grey objects on this stack");
282 282 newOop->oop_iterate(cl);
283 283 if (yield_after && _cm->do_yield_check()) {
284 284 res = false;
285 285 break;
286 286 }
287 287 }
288 288 debug_only(_drain_in_progress = false);
289 289 return res;
290 290 }
291 291
292 292 void CMMarkStack::note_start_of_gc() {
293 293 assert(_saved_index == -1,
294 294 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
295 295 _saved_index = _index;
296 296 }
297 297
298 298 void CMMarkStack::note_end_of_gc() {
299 299 // This is intentionally a guarantee, instead of an assert. If we
300 300 // accidentally add something to the mark stack during GC, it
301 301 // will be a correctness issue so it's better if we crash. we'll
302 302 // only check this once per GC anyway, so it won't be a performance
303 303 // issue in any way.
304 304 guarantee(_saved_index == _index,
305 305 err_msg("saved index: %d index: %d", _saved_index, _index));
306 306 _saved_index = -1;
307 307 }
308 308
309 309 void CMMarkStack::oops_do(OopClosure* f) {
310 310 assert(_saved_index == _index,
311 311 err_msg("saved index: %d index: %d", _saved_index, _index));
312 312 for (int i = 0; i < _index; i += 1) {
313 313 f->do_oop(&_base[i]);
314 314 }
315 315 }
316 316
317 317 bool ConcurrentMark::not_yet_marked(oop obj) const {
318 318 return _g1h->is_obj_ill(obj);
319 319 }
320 320
321 321 CMRootRegions::CMRootRegions() :
322 322 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
323 323 _should_abort(false), _next_survivor(NULL) { }
324 324
325 325 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
326 326 _young_list = g1h->young_list();
327 327 _cm = cm;
328 328 }
329 329
330 330 void CMRootRegions::prepare_for_scan() {
331 331 assert(!scan_in_progress(), "pre-condition");
332 332
333 333 // Currently, only survivors can be root regions.
334 334 assert(_next_survivor == NULL, "pre-condition");
335 335 _next_survivor = _young_list->first_survivor_region();
336 336 _scan_in_progress = (_next_survivor != NULL);
337 337 _should_abort = false;
338 338 }
339 339
340 340 HeapRegion* CMRootRegions::claim_next() {
341 341 if (_should_abort) {
342 342 // If someone has set the should_abort flag, we return NULL to
343 343 // force the caller to bail out of their loop.
344 344 return NULL;
345 345 }
346 346
347 347 // Currently, only survivors can be root regions.
348 348 HeapRegion* res = _next_survivor;
349 349 if (res != NULL) {
350 350 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
351 351 // Read it again in case it changed while we were waiting for the lock.
352 352 res = _next_survivor;
353 353 if (res != NULL) {
354 354 if (res == _young_list->last_survivor_region()) {
355 355 // We just claimed the last survivor so store NULL to indicate
356 356 // that we're done.
357 357 _next_survivor = NULL;
358 358 } else {
359 359 _next_survivor = res->get_next_young_region();
360 360 }
361 361 } else {
362 362 // Someone else claimed the last survivor while we were trying
363 363 // to take the lock so nothing else to do.
364 364 }
365 365 }
366 366 assert(res == NULL || res->is_survivor(), "post-condition");
367 367
368 368 return res;
369 369 }
370 370
371 371 void CMRootRegions::scan_finished() {
372 372 assert(scan_in_progress(), "pre-condition");
373 373
374 374 // Currently, only survivors can be root regions.
375 375 if (!_should_abort) {
376 376 assert(_next_survivor == NULL, "we should have claimed all survivors");
377 377 }
378 378 _next_survivor = NULL;
379 379
380 380 {
381 381 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
382 382 _scan_in_progress = false;
383 383 RootRegionScan_lock->notify_all();
384 384 }
385 385 }
386 386
387 387 bool CMRootRegions::wait_until_scan_finished() {
388 388 if (!scan_in_progress()) return false;
389 389
390 390 {
391 391 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
392 392 while (scan_in_progress()) {
393 393 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
394 394 }
395 395 }
396 396 return true;
397 397 }
398 398
399 399 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
400 400 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
401 401 #endif // _MSC_VER
402 402
403 403 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
404 404 return MAX2((n_par_threads + 2) / 4, 1U);
405 405 }
406 406
407 407 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
408 408 _markBitMap1(rs, MinObjAlignment - 1),
409 409 _markBitMap2(rs, MinObjAlignment - 1),
410 410
411 411 _parallel_marking_threads(0),
412 412 _max_parallel_marking_threads(0),
413 413 _sleep_factor(0.0),
414 414 _marking_task_overhead(1.0),
415 415 _cleanup_sleep_factor(0.0),
416 416 _cleanup_task_overhead(1.0),
417 417 _cleanup_list("Cleanup List"),
418 418 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
419 419 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
420 420 CardTableModRefBS::card_shift,
421 421 false /* in_resource_area*/),
422 422
423 423 _prevMarkBitMap(&_markBitMap1),
424 424 _nextMarkBitMap(&_markBitMap2),
425 425
426 426 _markStack(this),
427 427 // _finger set in set_non_marking_state
428 428
429 429 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
430 430 // _active_tasks set in set_non_marking_state
431 431 // _tasks set inside the constructor
432 432 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
433 433 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
434 434
435 435 _has_overflown(false),
436 436 _concurrent(false),
437 437 _has_aborted(false),
438 438 _restart_for_overflow(false),
439 439 _concurrent_marking_in_progress(false),
440 440
441 441 // _verbose_level set below
442 442
443 443 _init_times(),
444 444 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
445 445 _cleanup_times(),
446 446 _total_counting_time(0.0),
447 447 _total_rs_scrub_time(0.0),
448 448
449 449 _parallel_workers(NULL),
450 450
451 451 _count_card_bitmaps(NULL),
452 452 _count_marked_bytes(NULL) {
453 453 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
454 454 if (verbose_level < no_verbose) {
455 455 verbose_level = no_verbose;
456 456 }
457 457 if (verbose_level > high_verbose) {
458 458 verbose_level = high_verbose;
459 459 }
460 460 _verbose_level = verbose_level;
461 461
462 462 if (verbose_low()) {
463 463 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
464 464 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
465 465 }
466 466
467 467 _markStack.allocate(MarkStackSize);
468 468
469 469 // Create & start a ConcurrentMark thread.
470 470 _cmThread = new ConcurrentMarkThread(this);
471 471 assert(cmThread() != NULL, "CM Thread should have been created");
472 472 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
473 473
474 474 _g1h = G1CollectedHeap::heap();
475 475 assert(CGC_lock != NULL, "Where's the CGC_lock?");
476 476 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
477 477 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
478 478
479 479 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
480 480 satb_qs.set_buffer_size(G1SATBBufferSize);
481 481
482 482 _root_regions.init(_g1h, this);
483 483
484 484 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);
485 485 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);
486 486
487 487 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC);
488 488 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);
489 489
490 490 BitMap::idx_t card_bm_size = _card_bm.size();
491 491
492 492 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
493 493 _active_tasks = _max_task_num;
494 494 for (int i = 0; i < (int) _max_task_num; ++i) {
495 495 CMTaskQueue* task_queue = new CMTaskQueue();
496 496 task_queue->initialize();
497 497 _task_queues->register_queue(i, task_queue);
498 498
499 499 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
500 500 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
501 501
502 502 _tasks[i] = new CMTask(i, this,
503 503 _count_marked_bytes[i],
504 504 &_count_card_bitmaps[i],
505 505 task_queue, _task_queues);
506 506
507 507 _accum_task_vtime[i] = 0.0;
508 508 }
509 509
510 510 // Calculate the card number for the bottom of the heap. Used
511 511 // in biasing indexes into the accounting card bitmaps.
512 512 _heap_bottom_card_num =
513 513 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
514 514 CardTableModRefBS::card_shift);
515 515
516 516 // Clear all the liveness counting data
517 517 clear_all_count_data();
518 518
519 519 if (ConcGCThreads > ParallelGCThreads) {
520 520 vm_exit_during_initialization("Can't have more ConcGCThreads "
521 521 "than ParallelGCThreads.");
522 522 }
523 523 if (ParallelGCThreads == 0) {
524 524 // if we are not running with any parallel GC threads we will not
525 525 // spawn any marking threads either
526 526 _parallel_marking_threads = 0;
527 527 _max_parallel_marking_threads = 0;
528 528 _sleep_factor = 0.0;
529 529 _marking_task_overhead = 1.0;
530 530 } else {
531 531 if (ConcGCThreads > 0) {
532 532 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
533 533 // if both are set
534 534
535 535 _parallel_marking_threads = (uint) ConcGCThreads;
536 536 _max_parallel_marking_threads = _parallel_marking_threads;
537 537 _sleep_factor = 0.0;
538 538 _marking_task_overhead = 1.0;
539 539 } else if (G1MarkingOverheadPercent > 0) {
540 540 // we will calculate the number of parallel marking threads
541 541 // based on a target overhead with respect to the soft real-time
542 542 // goal
543 543
544 544 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
545 545 double overall_cm_overhead =
546 546 (double) MaxGCPauseMillis * marking_overhead /
547 547 (double) GCPauseIntervalMillis;
548 548 double cpu_ratio = 1.0 / (double) os::processor_count();
549 549 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
550 550 double marking_task_overhead =
551 551 overall_cm_overhead / marking_thread_num *
552 552 (double) os::processor_count();
553 553 double sleep_factor =
554 554 (1.0 - marking_task_overhead) / marking_task_overhead;
555 555
556 556 _parallel_marking_threads = (uint) marking_thread_num;
557 557 _max_parallel_marking_threads = _parallel_marking_threads;
558 558 _sleep_factor = sleep_factor;
559 559 _marking_task_overhead = marking_task_overhead;
560 560 } else {
561 561 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
562 562 _max_parallel_marking_threads = _parallel_marking_threads;
563 563 _sleep_factor = 0.0;
564 564 _marking_task_overhead = 1.0;
565 565 }
566 566
567 567 if (parallel_marking_threads() > 1) {
568 568 _cleanup_task_overhead = 1.0;
569 569 } else {
570 570 _cleanup_task_overhead = marking_task_overhead();
571 571 }
572 572 _cleanup_sleep_factor =
573 573 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
574 574
575 575 #if 0
576 576 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
577 577 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
578 578 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
579 579 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
580 580 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
581 581 #endif
582 582
583 583 guarantee(parallel_marking_threads() > 0, "peace of mind");
584 584 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
585 585 _max_parallel_marking_threads, false, true);
586 586 if (_parallel_workers == NULL) {
587 587 vm_exit_during_initialization("Failed necessary allocation.");
588 588 } else {
589 589 _parallel_workers->initialize_workers();
590 590 }
591 591 }
592 592
593 593 // so that the call below can read a sensible value
594 594 _heap_start = (HeapWord*) rs.base();
595 595 set_non_marking_state();
596 596 }
597 597
598 598 void ConcurrentMark::update_g1_committed(bool force) {
599 599 // If concurrent marking is not in progress, then we do not need to
600 600 // update _heap_end.
601 601 if (!concurrent_marking_in_progress() && !force) return;
602 602
603 603 MemRegion committed = _g1h->g1_committed();
604 604 assert(committed.start() == _heap_start, "start shouldn't change");
605 605 HeapWord* new_end = committed.end();
606 606 if (new_end > _heap_end) {
607 607 // The heap has been expanded.
608 608
609 609 _heap_end = new_end;
610 610 }
611 611 // Notice that the heap can also shrink. However, this only happens
612 612 // during a Full GC (at least currently) and the entire marking
613 613 // phase will bail out and the task will not be restarted. So, let's
614 614 // do nothing.
615 615 }
616 616
617 617 void ConcurrentMark::reset() {
618 618 // Starting values for these two. This should be called in a STW
619 619 // phase. CM will be notified of any future g1_committed expansions
620 620 // will be at the end of evacuation pauses, when tasks are
621 621 // inactive.
622 622 MemRegion committed = _g1h->g1_committed();
623 623 _heap_start = committed.start();
624 624 _heap_end = committed.end();
625 625
626 626 // Separated the asserts so that we know which one fires.
627 627 assert(_heap_start != NULL, "heap bounds should look ok");
628 628 assert(_heap_end != NULL, "heap bounds should look ok");
629 629 assert(_heap_start < _heap_end, "heap bounds should look ok");
630 630
631 631 // reset all the marking data structures and any necessary flags
632 632 clear_marking_state();
633 633
634 634 if (verbose_low()) {
635 635 gclog_or_tty->print_cr("[global] resetting");
636 636 }
637 637
638 638 // We do reset all of them, since different phases will use
639 639 // different number of active threads. So, it's easiest to have all
640 640 // of them ready.
641 641 for (int i = 0; i < (int) _max_task_num; ++i) {
642 642 _tasks[i]->reset(_nextMarkBitMap);
643 643 }
644 644
645 645 // we need this to make sure that the flag is on during the evac
646 646 // pause with initial mark piggy-backed
647 647 set_concurrent_marking_in_progress();
648 648 }
649 649
650 650 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
651 651 assert(active_tasks <= _max_task_num, "we should not have more");
652 652
653 653 _active_tasks = active_tasks;
654 654 // Need to update the three data structures below according to the
655 655 // number of active threads for this phase.
656 656 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
657 657 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
658 658 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
659 659
660 660 _concurrent = concurrent;
661 661 // We propagate this to all tasks, not just the active ones.
662 662 for (int i = 0; i < (int) _max_task_num; ++i)
663 663 _tasks[i]->set_concurrent(concurrent);
664 664
665 665 if (concurrent) {
666 666 set_concurrent_marking_in_progress();
667 667 } else {
668 668 // We currently assume that the concurrent flag has been set to
669 669 // false before we start remark. At this point we should also be
670 670 // in a STW phase.
671 671 assert(!concurrent_marking_in_progress(), "invariant");
672 672 assert(_finger == _heap_end, "only way to get here");
673 673 update_g1_committed(true);
674 674 }
675 675 }
676 676
677 677 void ConcurrentMark::set_non_marking_state() {
678 678 // We set the global marking state to some default values when we're
679 679 // not doing marking.
680 680 clear_marking_state();
681 681 _active_tasks = 0;
682 682 clear_concurrent_marking_in_progress();
683 683 }
684 684
685 685 ConcurrentMark::~ConcurrentMark() {
686 686 // The ConcurrentMark instance is never freed.
687 687 ShouldNotReachHere();
688 688 }
689 689
690 690 void ConcurrentMark::clearNextBitmap() {
691 691 G1CollectedHeap* g1h = G1CollectedHeap::heap();
692 692 G1CollectorPolicy* g1p = g1h->g1_policy();
693 693
694 694 // Make sure that the concurrent mark thread looks to still be in
695 695 // the current cycle.
696 696 guarantee(cmThread()->during_cycle(), "invariant");
697 697
698 698 // We are finishing up the current cycle by clearing the next
699 699 // marking bitmap and getting it ready for the next cycle. During
700 700 // this time no other cycle can start. So, let's make sure that this
701 701 // is the case.
702 702 guarantee(!g1h->mark_in_progress(), "invariant");
703 703
704 704 // clear the mark bitmap (no grey objects to start with).
705 705 // We need to do this in chunks and offer to yield in between
706 706 // each chunk.
707 707 HeapWord* start = _nextMarkBitMap->startWord();
708 708 HeapWord* end = _nextMarkBitMap->endWord();
709 709 HeapWord* cur = start;
710 710 size_t chunkSize = M;
711 711 while (cur < end) {
712 712 HeapWord* next = cur + chunkSize;
713 713 if (next > end) {
714 714 next = end;
715 715 }
716 716 MemRegion mr(cur,next);
717 717 _nextMarkBitMap->clearRange(mr);
718 718 cur = next;
719 719 do_yield_check();
720 720
721 721 // Repeat the asserts from above. We'll do them as asserts here to
722 722 // minimize their overhead on the product. However, we'll have
723 723 // them as guarantees at the beginning / end of the bitmap
724 724 // clearing to get some checking in the product.
725 725 assert(cmThread()->during_cycle(), "invariant");
726 726 assert(!g1h->mark_in_progress(), "invariant");
727 727 }
728 728
729 729 // Clear the liveness counting data
730 730 clear_all_count_data();
731 731
732 732 // Repeat the asserts from above.
733 733 guarantee(cmThread()->during_cycle(), "invariant");
734 734 guarantee(!g1h->mark_in_progress(), "invariant");
735 735 }
736 736
737 737 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
738 738 public:
739 739 bool doHeapRegion(HeapRegion* r) {
740 740 if (!r->continuesHumongous()) {
741 741 r->note_start_of_marking();
742 742 }
743 743 return false;
744 744 }
745 745 };
746 746
747 747 void ConcurrentMark::checkpointRootsInitialPre() {
748 748 G1CollectedHeap* g1h = G1CollectedHeap::heap();
749 749 G1CollectorPolicy* g1p = g1h->g1_policy();
750 750
751 751 _has_aborted = false;
752 752
753 753 #ifndef PRODUCT
754 754 if (G1PrintReachableAtInitialMark) {
755 755 print_reachable("at-cycle-start",
756 756 VerifyOption_G1UsePrevMarking, true /* all */);
757 757 }
758 758 #endif
759 759
760 760 // Initialise marking structures. This has to be done in a STW phase.
761 761 reset();
762 762
763 763 // For each region note start of marking.
764 764 NoteStartOfMarkHRClosure startcl;
765 765 g1h->heap_region_iterate(&startcl);
766 766 }
767 767
768 768
769 769 void ConcurrentMark::checkpointRootsInitialPost() {
770 770 G1CollectedHeap* g1h = G1CollectedHeap::heap();
771 771
772 772 // If we force an overflow during remark, the remark operation will
773 773 // actually abort and we'll restart concurrent marking. If we always
774 774 // force an oveflow during remark we'll never actually complete the
775 775 // marking phase. So, we initilize this here, at the start of the
776 776 // cycle, so that at the remaining overflow number will decrease at
777 777 // every remark and we'll eventually not need to cause one.
778 778 force_overflow_stw()->init();
779 779
780 780 // Start Concurrent Marking weak-reference discovery.
781 781 ReferenceProcessor* rp = g1h->ref_processor_cm();
782 782 // enable ("weak") refs discovery
783 783 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
784 784 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
785 785
786 786 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
787 787 // This is the start of the marking cycle, we're expected all
788 788 // threads to have SATB queues with active set to false.
789 789 satb_mq_set.set_active_all_threads(true, /* new active value */
790 790 false /* expected_active */);
791 791
792 792 _root_regions.prepare_for_scan();
793 793
794 794 // update_g1_committed() will be called at the end of an evac pause
795 795 // when marking is on. So, it's also called at the end of the
796 796 // initial-mark pause to update the heap end, if the heap expands
797 797 // during it. No need to call it here.
798 798 }
799 799
800 800 /*
801 801 * Notice that in the next two methods, we actually leave the STS
802 802 * during the barrier sync and join it immediately afterwards. If we
803 803 * do not do this, the following deadlock can occur: one thread could
804 804 * be in the barrier sync code, waiting for the other thread to also
805 805 * sync up, whereas another one could be trying to yield, while also
806 806 * waiting for the other threads to sync up too.
807 807 *
808 808 * Note, however, that this code is also used during remark and in
809 809 * this case we should not attempt to leave / enter the STS, otherwise
810 810 * we'll either hit an asseert (debug / fastdebug) or deadlock
811 811 * (product). So we should only leave / enter the STS if we are
812 812 * operating concurrently.
813 813 *
814 814 * Because the thread that does the sync barrier has left the STS, it
815 815 * is possible to be suspended for a Full GC or an evacuation pause
816 816 * could occur. This is actually safe, since the entering the sync
817 817 * barrier is one of the last things do_marking_step() does, and it
818 818 * doesn't manipulate any data structures afterwards.
819 819 */
820 820
821 821 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
822 822 if (verbose_low()) {
823 823 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
824 824 }
825 825
826 826 if (concurrent()) {
827 827 ConcurrentGCThread::stsLeave();
828 828 }
829 829 _first_overflow_barrier_sync.enter();
830 830 if (concurrent()) {
831 831 ConcurrentGCThread::stsJoin();
832 832 }
833 833 // at this point everyone should have synced up and not be doing any
834 834 // more work
835 835
836 836 if (verbose_low()) {
837 837 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
838 838 }
839 839
840 840 // let task 0 do this
841 841 if (task_num == 0) {
842 842 // task 0 is responsible for clearing the global data structures
843 843 // We should be here because of an overflow. During STW we should
844 844 // not clear the overflow flag since we rely on it being true when
845 845 // we exit this method to abort the pause and restart concurent
846 846 // marking.
847 847 clear_marking_state(concurrent() /* clear_overflow */);
848 848 force_overflow()->update();
849 849
850 850 if (G1Log::fine()) {
851 851 gclog_or_tty->date_stamp(PrintGCDateStamps);
852 852 gclog_or_tty->stamp(PrintGCTimeStamps);
853 853 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
854 854 }
855 855 }
856 856
857 857 // after this, each task should reset its own data structures then
858 858 // then go into the second barrier
859 859 }
860 860
861 861 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
862 862 if (verbose_low()) {
863 863 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
864 864 }
865 865
866 866 if (concurrent()) {
867 867 ConcurrentGCThread::stsLeave();
868 868 }
869 869 _second_overflow_barrier_sync.enter();
870 870 if (concurrent()) {
871 871 ConcurrentGCThread::stsJoin();
872 872 }
873 873 // at this point everything should be re-initialised and ready to go
874 874
875 875 if (verbose_low()) {
876 876 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
877 877 }
878 878 }
879 879
880 880 #ifndef PRODUCT
881 881 void ForceOverflowSettings::init() {
882 882 _num_remaining = G1ConcMarkForceOverflow;
883 883 _force = false;
884 884 update();
885 885 }
886 886
887 887 void ForceOverflowSettings::update() {
888 888 if (_num_remaining > 0) {
889 889 _num_remaining -= 1;
890 890 _force = true;
891 891 } else {
892 892 _force = false;
893 893 }
894 894 }
895 895
896 896 bool ForceOverflowSettings::should_force() {
897 897 if (_force) {
898 898 _force = false;
899 899 return true;
900 900 } else {
901 901 return false;
902 902 }
903 903 }
904 904 #endif // !PRODUCT
905 905
906 906 class CMConcurrentMarkingTask: public AbstractGangTask {
907 907 private:
908 908 ConcurrentMark* _cm;
909 909 ConcurrentMarkThread* _cmt;
910 910
911 911 public:
912 912 void work(uint worker_id) {
913 913 assert(Thread::current()->is_ConcurrentGC_thread(),
914 914 "this should only be done by a conc GC thread");
915 915 ResourceMark rm;
916 916
917 917 double start_vtime = os::elapsedVTime();
918 918
919 919 ConcurrentGCThread::stsJoin();
920 920
921 921 assert(worker_id < _cm->active_tasks(), "invariant");
922 922 CMTask* the_task = _cm->task(worker_id);
923 923 the_task->record_start_time();
924 924 if (!_cm->has_aborted()) {
925 925 do {
926 926 double start_vtime_sec = os::elapsedVTime();
927 927 double start_time_sec = os::elapsedTime();
928 928 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
929 929
930 930 the_task->do_marking_step(mark_step_duration_ms,
931 931 true /* do_stealing */,
932 932 true /* do_termination */);
933 933
934 934 double end_time_sec = os::elapsedTime();
935 935 double end_vtime_sec = os::elapsedVTime();
936 936 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
937 937 double elapsed_time_sec = end_time_sec - start_time_sec;
938 938 _cm->clear_has_overflown();
939 939
940 940 bool ret = _cm->do_yield_check(worker_id);
941 941
942 942 jlong sleep_time_ms;
943 943 if (!_cm->has_aborted() && the_task->has_aborted()) {
944 944 sleep_time_ms =
945 945 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
946 946 ConcurrentGCThread::stsLeave();
947 947 os::sleep(Thread::current(), sleep_time_ms, false);
948 948 ConcurrentGCThread::stsJoin();
949 949 }
950 950 double end_time2_sec = os::elapsedTime();
951 951 double elapsed_time2_sec = end_time2_sec - start_time_sec;
952 952
953 953 #if 0
954 954 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
955 955 "overhead %1.4lf",
956 956 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
957 957 the_task->conc_overhead(os::elapsedTime()) * 8.0);
958 958 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
959 959 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
960 960 #endif
961 961 } while (!_cm->has_aborted() && the_task->has_aborted());
962 962 }
963 963 the_task->record_end_time();
964 964 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
965 965
966 966 ConcurrentGCThread::stsLeave();
967 967
968 968 double end_vtime = os::elapsedVTime();
969 969 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
970 970 }
971 971
972 972 CMConcurrentMarkingTask(ConcurrentMark* cm,
973 973 ConcurrentMarkThread* cmt) :
974 974 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
975 975
976 976 ~CMConcurrentMarkingTask() { }
977 977 };
978 978
979 979 // Calculates the number of active workers for a concurrent
980 980 // phase.
981 981 uint ConcurrentMark::calc_parallel_marking_threads() {
982 982 if (G1CollectedHeap::use_parallel_gc_threads()) {
983 983 uint n_conc_workers = 0;
984 984 if (!UseDynamicNumberOfGCThreads ||
985 985 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
986 986 !ForceDynamicNumberOfGCThreads)) {
987 987 n_conc_workers = max_parallel_marking_threads();
988 988 } else {
989 989 n_conc_workers =
990 990 AdaptiveSizePolicy::calc_default_active_workers(
991 991 max_parallel_marking_threads(),
992 992 1, /* Minimum workers */
993 993 parallel_marking_threads(),
994 994 Threads::number_of_non_daemon_threads());
995 995 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
996 996 // that scaling has already gone into "_max_parallel_marking_threads".
997 997 }
998 998 assert(n_conc_workers > 0, "Always need at least 1");
999 999 return n_conc_workers;
1000 1000 }
1001 1001 // If we are not running with any parallel GC threads we will not
1002 1002 // have spawned any marking threads either. Hence the number of
1003 1003 // concurrent workers should be 0.
1004 1004 return 0;
1005 1005 }
1006 1006
1007 1007 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1008 1008 // Currently, only survivors can be root regions.
1009 1009 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1010 1010 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1011 1011
1012 1012 const uintx interval = PrefetchScanIntervalInBytes;
1013 1013 HeapWord* curr = hr->bottom();
1014 1014 const HeapWord* end = hr->top();
1015 1015 while (curr < end) {
1016 1016 Prefetch::read(curr, interval);
1017 1017 oop obj = oop(curr);
1018 1018 int size = obj->oop_iterate(&cl);
1019 1019 assert(size == obj->size(), "sanity");
1020 1020 curr += size;
1021 1021 }
1022 1022 }
1023 1023
1024 1024 class CMRootRegionScanTask : public AbstractGangTask {
1025 1025 private:
1026 1026 ConcurrentMark* _cm;
1027 1027
1028 1028 public:
1029 1029 CMRootRegionScanTask(ConcurrentMark* cm) :
1030 1030 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1031 1031
1032 1032 void work(uint worker_id) {
1033 1033 assert(Thread::current()->is_ConcurrentGC_thread(),
1034 1034 "this should only be done by a conc GC thread");
1035 1035
1036 1036 CMRootRegions* root_regions = _cm->root_regions();
1037 1037 HeapRegion* hr = root_regions->claim_next();
1038 1038 while (hr != NULL) {
1039 1039 _cm->scanRootRegion(hr, worker_id);
1040 1040 hr = root_regions->claim_next();
1041 1041 }
1042 1042 }
1043 1043 };
1044 1044
1045 1045 void ConcurrentMark::scanRootRegions() {
1046 1046 // scan_in_progress() will have been set to true only if there was
1047 1047 // at least one root region to scan. So, if it's false, we
1048 1048 // should not attempt to do any further work.
1049 1049 if (root_regions()->scan_in_progress()) {
1050 1050 _parallel_marking_threads = calc_parallel_marking_threads();
1051 1051 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1052 1052 "Maximum number of marking threads exceeded");
1053 1053 uint active_workers = MAX2(1U, parallel_marking_threads());
1054 1054
1055 1055 CMRootRegionScanTask task(this);
1056 1056 if (parallel_marking_threads() > 0) {
1057 1057 _parallel_workers->set_active_workers((int) active_workers);
1058 1058 _parallel_workers->run_task(&task);
1059 1059 } else {
1060 1060 task.work(0);
1061 1061 }
1062 1062
1063 1063 // It's possible that has_aborted() is true here without actually
1064 1064 // aborting the survivor scan earlier. This is OK as it's
1065 1065 // mainly used for sanity checking.
1066 1066 root_regions()->scan_finished();
1067 1067 }
1068 1068 }
1069 1069
1070 1070 void ConcurrentMark::markFromRoots() {
1071 1071 // we might be tempted to assert that:
1072 1072 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1073 1073 // "inconsistent argument?");
1074 1074 // However that wouldn't be right, because it's possible that
1075 1075 // a safepoint is indeed in progress as a younger generation
1076 1076 // stop-the-world GC happens even as we mark in this generation.
1077 1077
1078 1078 _restart_for_overflow = false;
1079 1079 force_overflow_conc()->init();
1080 1080
1081 1081 // _g1h has _n_par_threads
1082 1082 _parallel_marking_threads = calc_parallel_marking_threads();
1083 1083 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1084 1084 "Maximum number of marking threads exceeded");
1085 1085
1086 1086 uint active_workers = MAX2(1U, parallel_marking_threads());
1087 1087
1088 1088 // Parallel task terminator is set in "set_phase()"
1089 1089 set_phase(active_workers, true /* concurrent */);
1090 1090
1091 1091 CMConcurrentMarkingTask markingTask(this, cmThread());
1092 1092 if (parallel_marking_threads() > 0) {
1093 1093 _parallel_workers->set_active_workers((int)active_workers);
1094 1094 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1095 1095 // and the decisions on that MT processing is made elsewhere.
1096 1096 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1097 1097 _parallel_workers->run_task(&markingTask);
1098 1098 } else {
1099 1099 markingTask.work(0);
1100 1100 }
1101 1101 print_stats();
1102 1102 }
1103 1103
1104 1104 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1105 1105 // world is stopped at this checkpoint
1106 1106 assert(SafepointSynchronize::is_at_safepoint(),
1107 1107 "world should be stopped");
1108 1108
1109 1109 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1110 1110
1111 1111 // If a full collection has happened, we shouldn't do this.
1112 1112 if (has_aborted()) {
1113 1113 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1114 1114 return;
1115 1115 }
1116 1116
1117 1117 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1118 1118
1119 1119 if (VerifyDuringGC) {
1120 1120 HandleMark hm; // handle scope
1121 1121 gclog_or_tty->print(" VerifyDuringGC:(before)");
1122 1122 Universe::heap()->prepare_for_verify();
1123 1123 Universe::verify(/* silent */ false,
1124 1124 /* option */ VerifyOption_G1UsePrevMarking);
1125 1125 }
1126 1126
1127 1127 G1CollectorPolicy* g1p = g1h->g1_policy();
1128 1128 g1p->record_concurrent_mark_remark_start();
1129 1129
1130 1130 double start = os::elapsedTime();
1131 1131
1132 1132 checkpointRootsFinalWork();
1133 1133
1134 1134 double mark_work_end = os::elapsedTime();
1135 1135
1136 1136 weakRefsWork(clear_all_soft_refs);
1137 1137
1138 1138 if (has_overflown()) {
1139 1139 // Oops. We overflowed. Restart concurrent marking.
1140 1140 _restart_for_overflow = true;
1141 1141 // Clear the flag. We do not need it any more.
1142 1142 clear_has_overflown();
1143 1143 if (G1TraceMarkStackOverflow) {
1144 1144 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1145 1145 }
1146 1146 } else {
1147 1147 // Aggregate the per-task counting data that we have accumulated
1148 1148 // while marking.
1149 1149 aggregate_count_data();
1150 1150
1151 1151 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1152 1152 // We're done with marking.
1153 1153 // This is the end of the marking cycle, we're expected all
1154 1154 // threads to have SATB queues with active set to true.
1155 1155 satb_mq_set.set_active_all_threads(false, /* new active value */
1156 1156 true /* expected_active */);
1157 1157
1158 1158 if (VerifyDuringGC) {
1159 1159 HandleMark hm; // handle scope
1160 1160 gclog_or_tty->print(" VerifyDuringGC:(after)");
1161 1161 Universe::heap()->prepare_for_verify();
1162 1162 Universe::verify(/* silent */ false,
1163 1163 /* option */ VerifyOption_G1UseNextMarking);
1164 1164 }
1165 1165 assert(!restart_for_overflow(), "sanity");
1166 1166 }
1167 1167
1168 1168 // Reset the marking state if marking completed
1169 1169 if (!restart_for_overflow()) {
1170 1170 set_non_marking_state();
1171 1171 }
1172 1172
1173 1173 #if VERIFY_OBJS_PROCESSED
1174 1174 _scan_obj_cl.objs_processed = 0;
1175 1175 ThreadLocalObjQueue::objs_enqueued = 0;
1176 1176 #endif
1177 1177
1178 1178 // Statistics
1179 1179 double now = os::elapsedTime();
1180 1180 _remark_mark_times.add((mark_work_end - start) * 1000.0);
↓ open down ↓ |
1180 lines elided |
↑ open up ↑ |
1181 1181 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1182 1182 _remark_times.add((now - start) * 1000.0);
1183 1183
1184 1184 g1p->record_concurrent_mark_remark_end();
1185 1185 }
1186 1186
1187 1187 // Base class of the closures that finalize and verify the
1188 1188 // liveness counting data.
1189 1189 class CMCountDataClosureBase: public HeapRegionClosure {
1190 1190 protected:
1191 + G1CollectedHeap* _g1h;
1191 1192 ConcurrentMark* _cm;
1193 + CardTableModRefBS* _ct_bs;
1194 +
1192 1195 BitMap* _region_bm;
1193 1196 BitMap* _card_bm;
1194 1197
1195 - void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
1196 - assert(start_idx <= last_idx, "sanity");
1197 -
1198 - // Set the inclusive bit range [start_idx, last_idx].
1199 - // For small ranges (up to 8 cards) use a simple loop; otherwise
1200 - // use par_at_put_range.
1201 - if ((last_idx - start_idx) < 8) {
1202 - for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1203 - _card_bm->par_set_bit(i);
1204 - }
1205 - } else {
1206 - assert(last_idx < _card_bm->size(), "sanity");
1207 - // Note BitMap::par_at_put_range() is exclusive.
1208 - BitMap::idx_t max_idx = MAX2(last_idx+1, _card_bm->size());
1209 - _card_bm->par_at_put_range(start_idx, max_idx, true);
1210 - }
1211 - }
1212 -
1213 - // It takes a region that's not empty (i.e., it has at least one
1198 + // Takes a region that's not empty (i.e., it has at least one
1214 1199 // live object in it and sets its corresponding bit on the region
1215 1200 // bitmap to 1. If the region is "starts humongous" it will also set
1216 1201 // to 1 the bits on the region bitmap that correspond to its
1217 1202 // associated "continues humongous" regions.
1218 1203 void set_bit_for_region(HeapRegion* hr) {
1219 1204 assert(!hr->continuesHumongous(), "should have filtered those out");
1220 1205
1221 1206 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1222 1207 if (!hr->startsHumongous()) {
1223 1208 // Normal (non-humongous) case: just set the bit.
1224 1209 _region_bm->par_at_put(index, true);
1225 1210 } else {
1226 1211 // Starts humongous case: calculate how many regions are part of
1227 1212 // this humongous region and then set the bit range.
1228 1213 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1229 1214 _region_bm->par_at_put_range(index, end_index, true);
1230 1215 }
1231 1216 }
1232 1217
1233 1218 public:
1234 - CMCountDataClosureBase(ConcurrentMark *cm,
1219 + CMCountDataClosureBase(G1CollectedHeap* g1h,
1235 1220 BitMap* region_bm, BitMap* card_bm):
1236 - _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { }
1221 + _g1h(g1h), _cm(g1h->concurrent_mark()),
1222 + _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1223 + _region_bm(region_bm), _card_bm(card_bm) { }
1237 1224 };
1238 1225
1239 1226 // Closure that calculates the # live objects per region. Used
1240 1227 // for verification purposes during the cleanup pause.
1241 1228 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1242 1229 CMBitMapRO* _bm;
1243 1230 size_t _region_marked_bytes;
1244 1231
1245 1232 public:
1246 - CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1233 + CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1247 1234 BitMap* region_bm, BitMap* card_bm) :
1248 - CMCountDataClosureBase(cm, region_bm, card_bm),
1235 + CMCountDataClosureBase(g1h, region_bm, card_bm),
1249 1236 _bm(bm), _region_marked_bytes(0) { }
1250 1237
1251 1238 bool doHeapRegion(HeapRegion* hr) {
1252 1239
1253 1240 if (hr->continuesHumongous()) {
1254 1241 // We will ignore these here and process them when their
1255 1242 // associated "starts humongous" region is processed (see
1256 1243 // set_bit_for_heap_region()). Note that we cannot rely on their
1257 1244 // associated "starts humongous" region to have their bit set to
1258 1245 // 1 since, due to the region chunking in the parallel region
1259 1246 // iteration, a "continues humongous" region might be visited
1260 1247 // before its associated "starts humongous".
1261 1248 return false;
1262 1249 }
1263 1250
1264 - HeapWord* nextTop = hr->next_top_at_mark_start();
1265 - HeapWord* start = hr->bottom();
1251 + HeapWord* ntams = hr->next_top_at_mark_start();
1252 + HeapWord* start = hr->bottom();
1266 1253
1267 - assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1254 + assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1268 1255 err_msg("Preconditions not met - "
1269 - "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
1270 - start, nextTop, hr->end()));
1256 + "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1257 + start, ntams, hr->end()));
1271 1258
1272 1259 // Find the first marked object at or after "start".
1273 - start = _bm->getNextMarkedWordAddress(start, nextTop);
1260 + start = _bm->getNextMarkedWordAddress(start, ntams);
1274 1261
1275 1262 size_t marked_bytes = 0;
1276 1263
1277 - while (start < nextTop) {
1264 + while (start < ntams) {
1278 1265 oop obj = oop(start);
1279 1266 int obj_sz = obj->size();
1280 - HeapWord* obj_last = start + obj_sz - 1;
1267 + HeapWord* obj_end = start + obj_sz;
1281 1268
1282 1269 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1283 - BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last);
1270 + BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1271 +
1272 + // Note: if we're looking at the last region in heap - obj_end
1273 + // could be actually outside the heap and end_idx correspond
1274 + // to a card that is also outside the heap.
1275 + if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1276 + // end of object is not card aligned - increment to cover
1277 + // all the cards spanned by the object
1278 + end_idx += 1;
1279 + }
1284 1280
1285 - // Set the bits in the card BM for this object (inclusive).
1286 - set_card_bitmap_range(start_idx, last_idx);
1281 + // Set the bits in the card BM for the cards spanned by this object.
1282 + _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1287 1283
1288 1284 // Add the size of this object to the number of marked bytes.
1289 1285 marked_bytes += (size_t)obj_sz * HeapWordSize;
1290 1286
1291 1287 // Find the next marked object after this one.
1292 - start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop);
1288 + start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1293 1289 }
1294 1290
1295 1291 // Mark the allocated-since-marking portion...
1296 1292 HeapWord* top = hr->top();
1297 - if (nextTop < top) {
1298 - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop);
1299 - BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1);
1293 + if (ntams < top) {
1294 + BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1295 + BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1300 1296
1301 - set_card_bitmap_range(start_idx, last_idx);
1297 + // Note: if we're looking at the last region in heap - top
1298 + // could actually be outside the heap and end_idx correspond
1299 + // to a card that is also outside the heap.
1300 + if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1301 + // end of object is not card aligned - increment to cover
1302 + // all the cards spanned by the object
1303 + end_idx += 1;
1304 + }
1305 + _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1302 1306
1303 1307 // This definitely means the region has live objects.
1304 1308 set_bit_for_region(hr);
1305 1309 }
1306 1310
1307 1311 // Update the live region bitmap.
1308 1312 if (marked_bytes > 0) {
1309 1313 set_bit_for_region(hr);
1310 1314 }
1311 1315
1312 1316 // Set the marked bytes for the current region so that
1313 1317 // it can be queried by a calling verificiation routine
1314 1318 _region_marked_bytes = marked_bytes;
1315 1319
1316 1320 return false;
1317 1321 }
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
1318 1322
1319 1323 size_t region_marked_bytes() const { return _region_marked_bytes; }
1320 1324 };
1321 1325
1322 1326 // Heap region closure used for verifying the counting data
1323 1327 // that was accumulated concurrently and aggregated during
1324 1328 // the remark pause. This closure is applied to the heap
1325 1329 // regions during the STW cleanup pause.
1326 1330
1327 1331 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1332 + G1CollectedHeap* _g1h;
1328 1333 ConcurrentMark* _cm;
1329 1334 CalcLiveObjectsClosure _calc_cl;
1330 1335 BitMap* _region_bm; // Region BM to be verified
1331 1336 BitMap* _card_bm; // Card BM to be verified
1332 1337 bool _verbose; // verbose output?
1333 1338
1334 1339 BitMap* _exp_region_bm; // Expected Region BM values
1335 1340 BitMap* _exp_card_bm; // Expected card BM values
1336 1341
1337 1342 int _failures;
1338 1343
1339 1344 public:
1340 - VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1345 + VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1341 1346 BitMap* region_bm,
1342 1347 BitMap* card_bm,
1343 1348 BitMap* exp_region_bm,
1344 1349 BitMap* exp_card_bm,
1345 1350 bool verbose) :
1346 - _cm(cm),
1347 - _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1351 + _g1h(g1h), _cm(g1h->concurrent_mark()),
1352 + _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1348 1353 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1349 1354 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1350 1355 _failures(0) { }
1351 1356
1352 1357 int failures() const { return _failures; }
1353 1358
1354 1359 bool doHeapRegion(HeapRegion* hr) {
1355 1360 if (hr->continuesHumongous()) {
1356 1361 // We will ignore these here and process them when their
1357 1362 // associated "starts humongous" region is processed (see
1358 1363 // set_bit_for_heap_region()). Note that we cannot rely on their
1359 1364 // associated "starts humongous" region to have their bit set to
1360 1365 // 1 since, due to the region chunking in the parallel region
1361 1366 // iteration, a "continues humongous" region might be visited
1362 1367 // before its associated "starts humongous".
1363 1368 return false;
1364 1369 }
1365 1370
1366 1371 int failures = 0;
1367 1372
1368 1373 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1369 1374 // this region and set the corresponding bits in the expected region
1370 1375 // and card bitmaps.
1371 1376 bool res = _calc_cl.doHeapRegion(hr);
1372 1377 assert(res == false, "should be continuing");
1373 1378
1374 1379 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1375 1380 Mutex::_no_safepoint_check_flag);
1376 1381
1377 1382 // Verify the marked bytes for this region.
1378 1383 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1379 1384 size_t act_marked_bytes = hr->next_marked_bytes();
1380 1385
1381 1386 // We're not OK if expected marked bytes > actual marked bytes. It means
1382 1387 // we have missed accounting some objects during the actual marking.
1383 1388 if (exp_marked_bytes > act_marked_bytes) {
1384 1389 if (_verbose) {
1385 1390 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1386 1391 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1387 1392 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1388 1393 }
1389 1394 failures += 1;
1390 1395 }
1391 1396
1392 1397 // Verify the bit, for this region, in the actual and expected
1393 1398 // (which was just calculated) region bit maps.
1394 1399 // We're not OK if the bit in the calculated expected region
1395 1400 // bitmap is set and the bit in the actual region bitmap is not.
1396 1401 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1397 1402
1398 1403 bool expected = _exp_region_bm->at(index);
1399 1404 bool actual = _region_bm->at(index);
1400 1405 if (expected && !actual) {
1401 1406 if (_verbose) {
1402 1407 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1403 1408 "expected: %s, actual: %s",
1404 1409 hr->hrs_index(),
1405 1410 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1406 1411 }
1407 1412 failures += 1;
1408 1413 }
1409 1414
1410 1415 // Verify that the card bit maps for the cards spanned by the current
1411 1416 // region match. We have an error if we have a set bit in the expected
1412 1417 // bit map and the corresponding bit in the actual bitmap is not set.
1413 1418
1414 1419 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1415 1420 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1416 1421
1417 1422 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1418 1423 expected = _exp_card_bm->at(i);
1419 1424 actual = _card_bm->at(i);
1420 1425
1421 1426 if (expected && !actual) {
1422 1427 if (_verbose) {
1423 1428 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1424 1429 "expected: %s, actual: %s",
1425 1430 hr->hrs_index(), i,
1426 1431 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1427 1432 }
1428 1433 failures += 1;
1429 1434 }
1430 1435 }
1431 1436
1432 1437 if (failures > 0 && _verbose) {
1433 1438 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1434 1439 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1435 1440 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1436 1441 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1437 1442 }
1438 1443
1439 1444 _failures += failures;
1440 1445
1441 1446 // We could stop iteration over the heap when we
1442 1447 // find the first violating region by returning true.
1443 1448 return false;
1444 1449 }
1445 1450 };
1446 1451
1447 1452
1448 1453 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1449 1454 protected:
1450 1455 G1CollectedHeap* _g1h;
1451 1456 ConcurrentMark* _cm;
1452 1457 BitMap* _actual_region_bm;
1453 1458 BitMap* _actual_card_bm;
1454 1459
1455 1460 uint _n_workers;
1456 1461
1457 1462 BitMap* _expected_region_bm;
1458 1463 BitMap* _expected_card_bm;
1459 1464
1460 1465 int _failures;
1461 1466 bool _verbose;
1462 1467
1463 1468 public:
1464 1469 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1465 1470 BitMap* region_bm, BitMap* card_bm,
1466 1471 BitMap* expected_region_bm, BitMap* expected_card_bm)
1467 1472 : AbstractGangTask("G1 verify final counting"),
1468 1473 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1469 1474 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1470 1475 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1471 1476 _failures(0), _verbose(false),
1472 1477 _n_workers(0) {
1473 1478 assert(VerifyDuringGC, "don't call this otherwise");
1474 1479
1475 1480 // Use the value already set as the number of active threads
1476 1481 // in the call to run_task().
1477 1482 if (G1CollectedHeap::use_parallel_gc_threads()) {
1478 1483 assert( _g1h->workers()->active_workers() > 0,
1479 1484 "Should have been previously set");
1480 1485 _n_workers = _g1h->workers()->active_workers();
1481 1486 } else {
1482 1487 _n_workers = 1;
1483 1488 }
↓ open down ↓ |
126 lines elided |
↑ open up ↑ |
1484 1489
1485 1490 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1486 1491 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1487 1492
1488 1493 _verbose = _cm->verbose_medium();
1489 1494 }
1490 1495
1491 1496 void work(uint worker_id) {
1492 1497 assert(worker_id < _n_workers, "invariant");
1493 1498
1494 - VerifyLiveObjectDataHRClosure verify_cl(_cm,
1499 + VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1495 1500 _actual_region_bm, _actual_card_bm,
1496 1501 _expected_region_bm,
1497 1502 _expected_card_bm,
1498 1503 _verbose);
1499 1504
1500 1505 if (G1CollectedHeap::use_parallel_gc_threads()) {
1501 1506 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1502 1507 worker_id,
1503 1508 _n_workers,
1504 1509 HeapRegion::VerifyCountClaimValue);
1505 1510 } else {
1506 1511 _g1h->heap_region_iterate(&verify_cl);
1507 1512 }
1508 1513
1509 1514 Atomic::add(verify_cl.failures(), &_failures);
1510 1515 }
1511 1516
1512 1517 int failures() const { return _failures; }
1513 1518 };
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
1514 1519
1515 1520 // Closure that finalizes the liveness counting data.
1516 1521 // Used during the cleanup pause.
1517 1522 // Sets the bits corresponding to the interval [NTAMS, top]
1518 1523 // (which contains the implicitly live objects) in the
1519 1524 // card liveness bitmap. Also sets the bit for each region,
1520 1525 // containing live data, in the region liveness bitmap.
1521 1526
1522 1527 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1523 1528 public:
1524 - FinalCountDataUpdateClosure(ConcurrentMark* cm,
1529 + FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1525 1530 BitMap* region_bm,
1526 1531 BitMap* card_bm) :
1527 - CMCountDataClosureBase(cm, region_bm, card_bm) { }
1532 + CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1528 1533
1529 1534 bool doHeapRegion(HeapRegion* hr) {
1530 1535
1531 1536 if (hr->continuesHumongous()) {
1532 1537 // We will ignore these here and process them when their
1533 1538 // associated "starts humongous" region is processed (see
1534 1539 // set_bit_for_heap_region()). Note that we cannot rely on their
1535 1540 // associated "starts humongous" region to have their bit set to
1536 1541 // 1 since, due to the region chunking in the parallel region
1537 1542 // iteration, a "continues humongous" region might be visited
1538 1543 // before its associated "starts humongous".
1539 1544 return false;
1540 1545 }
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
1541 1546
1542 1547 HeapWord* ntams = hr->next_top_at_mark_start();
1543 1548 HeapWord* top = hr->top();
1544 1549
1545 1550 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1546 1551
1547 1552 // Mark the allocated-since-marking portion...
1548 1553 if (ntams < top) {
1549 1554 // This definitely means the region has live objects.
1550 1555 set_bit_for_region(hr);
1551 - }
1552 -
1553 - // Now set the bits for [ntams, top]
1554 - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1555 - // set_card_bitmap_range() expects the last_idx to be with
1556 - // the range of the bit map (see assertion in set_card_bitmap_range()),
1557 - // so limit it to that range with this application of MIN2.
1558 - BitMap::idx_t last_idx = MIN2(_cm->card_bitmap_index_for(top),
1559 - _card_bm->size()-1);
1560 - if (start_idx < _card_bm->size()) {
1561 - set_card_bitmap_range(start_idx, last_idx);
1562 - } else {
1563 - // To reach here start_idx must be beyond the end of
1564 - // the bit map and last_idx must have been limited by
1565 - // the MIN2().
1566 - assert(start_idx == last_idx + 1,
1567 - err_msg("Not beyond end start_idx " SIZE_FORMAT " last_idx "
1568 - SIZE_FORMAT, start_idx, last_idx));
1556 +
1557 + // Now set the bits in the card bitmap for [ntams, top)
1558 + BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1559 + BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1560 +
1561 + // Note: if we're looking at the last region in heap - top
1562 + // could actually be outside the heap and end_idx correspond
1563 + // to a card that is also outside the heap.
1564 + if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1565 + // end of object is not card aligned - increment to cover
1566 + // all the cards spanned by the object
1567 + end_idx += 1;
1568 + }
1569 +
1570 + assert(end_idx <= _card_bm->size(),
1571 + err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1572 + end_idx, _card_bm->size()));
1573 + assert(start_idx < _card_bm->size(),
1574 + err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1575 + start_idx, _card_bm->size()));
1576 +
1577 + _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1569 1578 }
1570 1579
1571 1580 // Set the bit for the region if it contains live data
1572 1581 if (hr->next_marked_bytes() > 0) {
1573 1582 set_bit_for_region(hr);
1574 1583 }
1575 1584
1576 1585 return false;
1577 1586 }
1578 1587 };
1579 1588
1580 1589 class G1ParFinalCountTask: public AbstractGangTask {
1581 1590 protected:
1582 1591 G1CollectedHeap* _g1h;
1583 1592 ConcurrentMark* _cm;
1584 1593 BitMap* _actual_region_bm;
1585 1594 BitMap* _actual_card_bm;
1586 1595
1587 1596 uint _n_workers;
1588 1597
1589 1598 public:
1590 1599 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1591 1600 : AbstractGangTask("G1 final counting"),
1592 1601 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1593 1602 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1594 1603 _n_workers(0) {
1595 1604 // Use the value already set as the number of active threads
1596 1605 // in the call to run_task().
1597 1606 if (G1CollectedHeap::use_parallel_gc_threads()) {
1598 1607 assert( _g1h->workers()->active_workers() > 0,
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
1599 1608 "Should have been previously set");
1600 1609 _n_workers = _g1h->workers()->active_workers();
1601 1610 } else {
1602 1611 _n_workers = 1;
1603 1612 }
1604 1613 }
1605 1614
1606 1615 void work(uint worker_id) {
1607 1616 assert(worker_id < _n_workers, "invariant");
1608 1617
1609 - FinalCountDataUpdateClosure final_update_cl(_cm,
1618 + FinalCountDataUpdateClosure final_update_cl(_g1h,
1610 1619 _actual_region_bm,
1611 1620 _actual_card_bm);
1612 1621
1613 1622 if (G1CollectedHeap::use_parallel_gc_threads()) {
1614 1623 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1615 1624 worker_id,
1616 1625 _n_workers,
1617 1626 HeapRegion::FinalCountClaimValue);
1618 1627 } else {
1619 1628 _g1h->heap_region_iterate(&final_update_cl);
1620 1629 }
1621 1630 }
1622 1631 };
1623 1632
1624 1633 class G1ParNoteEndTask;
1625 1634
1626 1635 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1627 1636 G1CollectedHeap* _g1;
1628 1637 int _worker_num;
1629 1638 size_t _max_live_bytes;
1630 1639 uint _regions_claimed;
1631 1640 size_t _freed_bytes;
1632 1641 FreeRegionList* _local_cleanup_list;
1633 1642 OldRegionSet* _old_proxy_set;
1634 1643 HumongousRegionSet* _humongous_proxy_set;
1635 1644 HRRSCleanupTask* _hrrs_cleanup_task;
1636 1645 double _claimed_region_time;
1637 1646 double _max_region_time;
1638 1647
1639 1648 public:
1640 1649 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1641 1650 int worker_num,
1642 1651 FreeRegionList* local_cleanup_list,
1643 1652 OldRegionSet* old_proxy_set,
1644 1653 HumongousRegionSet* humongous_proxy_set,
1645 1654 HRRSCleanupTask* hrrs_cleanup_task) :
1646 1655 _g1(g1), _worker_num(worker_num),
1647 1656 _max_live_bytes(0), _regions_claimed(0),
1648 1657 _freed_bytes(0),
1649 1658 _claimed_region_time(0.0), _max_region_time(0.0),
1650 1659 _local_cleanup_list(local_cleanup_list),
1651 1660 _old_proxy_set(old_proxy_set),
1652 1661 _humongous_proxy_set(humongous_proxy_set),
1653 1662 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1654 1663
1655 1664 size_t freed_bytes() { return _freed_bytes; }
1656 1665
1657 1666 bool doHeapRegion(HeapRegion *hr) {
1658 1667 if (hr->continuesHumongous()) {
1659 1668 return false;
1660 1669 }
1661 1670 // We use a claim value of zero here because all regions
1662 1671 // were claimed with value 1 in the FinalCount task.
1663 1672 _g1->reset_gc_time_stamps(hr);
1664 1673 double start = os::elapsedTime();
1665 1674 _regions_claimed++;
1666 1675 hr->note_end_of_marking();
1667 1676 _max_live_bytes += hr->max_live_bytes();
1668 1677 _g1->free_region_if_empty(hr,
1669 1678 &_freed_bytes,
1670 1679 _local_cleanup_list,
1671 1680 _old_proxy_set,
1672 1681 _humongous_proxy_set,
1673 1682 _hrrs_cleanup_task,
1674 1683 true /* par */);
1675 1684 double region_time = (os::elapsedTime() - start);
1676 1685 _claimed_region_time += region_time;
1677 1686 if (region_time > _max_region_time) {
1678 1687 _max_region_time = region_time;
1679 1688 }
1680 1689 return false;
1681 1690 }
1682 1691
1683 1692 size_t max_live_bytes() { return _max_live_bytes; }
1684 1693 uint regions_claimed() { return _regions_claimed; }
1685 1694 double claimed_region_time_sec() { return _claimed_region_time; }
1686 1695 double max_region_time_sec() { return _max_region_time; }
1687 1696 };
1688 1697
1689 1698 class G1ParNoteEndTask: public AbstractGangTask {
1690 1699 friend class G1NoteEndOfConcMarkClosure;
1691 1700
1692 1701 protected:
1693 1702 G1CollectedHeap* _g1h;
1694 1703 size_t _max_live_bytes;
1695 1704 size_t _freed_bytes;
1696 1705 FreeRegionList* _cleanup_list;
1697 1706
1698 1707 public:
1699 1708 G1ParNoteEndTask(G1CollectedHeap* g1h,
1700 1709 FreeRegionList* cleanup_list) :
1701 1710 AbstractGangTask("G1 note end"), _g1h(g1h),
1702 1711 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1703 1712
1704 1713 void work(uint worker_id) {
1705 1714 double start = os::elapsedTime();
1706 1715 FreeRegionList local_cleanup_list("Local Cleanup List");
1707 1716 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1708 1717 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1709 1718 HRRSCleanupTask hrrs_cleanup_task;
1710 1719 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1711 1720 &old_proxy_set,
1712 1721 &humongous_proxy_set,
1713 1722 &hrrs_cleanup_task);
1714 1723 if (G1CollectedHeap::use_parallel_gc_threads()) {
1715 1724 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1716 1725 _g1h->workers()->active_workers(),
1717 1726 HeapRegion::NoteEndClaimValue);
1718 1727 } else {
1719 1728 _g1h->heap_region_iterate(&g1_note_end);
1720 1729 }
1721 1730 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1722 1731
1723 1732 // Now update the lists
1724 1733 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1725 1734 NULL /* free_list */,
1726 1735 &old_proxy_set,
1727 1736 &humongous_proxy_set,
1728 1737 true /* par */);
1729 1738 {
1730 1739 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1731 1740 _max_live_bytes += g1_note_end.max_live_bytes();
1732 1741 _freed_bytes += g1_note_end.freed_bytes();
1733 1742
1734 1743 // If we iterate over the global cleanup list at the end of
1735 1744 // cleanup to do this printing we will not guarantee to only
1736 1745 // generate output for the newly-reclaimed regions (the list
1737 1746 // might not be empty at the beginning of cleanup; we might
1738 1747 // still be working on its previous contents). So we do the
1739 1748 // printing here, before we append the new regions to the global
1740 1749 // cleanup list.
1741 1750
1742 1751 G1HRPrinter* hr_printer = _g1h->hr_printer();
1743 1752 if (hr_printer->is_active()) {
1744 1753 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1745 1754 while (iter.more_available()) {
1746 1755 HeapRegion* hr = iter.get_next();
1747 1756 hr_printer->cleanup(hr);
1748 1757 }
1749 1758 }
1750 1759
1751 1760 _cleanup_list->add_as_tail(&local_cleanup_list);
1752 1761 assert(local_cleanup_list.is_empty(), "post-condition");
1753 1762
1754 1763 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1755 1764 }
1756 1765 }
1757 1766 size_t max_live_bytes() { return _max_live_bytes; }
1758 1767 size_t freed_bytes() { return _freed_bytes; }
1759 1768 };
1760 1769
1761 1770 class G1ParScrubRemSetTask: public AbstractGangTask {
1762 1771 protected:
1763 1772 G1RemSet* _g1rs;
1764 1773 BitMap* _region_bm;
1765 1774 BitMap* _card_bm;
1766 1775 public:
1767 1776 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1768 1777 BitMap* region_bm, BitMap* card_bm) :
1769 1778 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1770 1779 _region_bm(region_bm), _card_bm(card_bm) { }
1771 1780
1772 1781 void work(uint worker_id) {
1773 1782 if (G1CollectedHeap::use_parallel_gc_threads()) {
1774 1783 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1775 1784 HeapRegion::ScrubRemSetClaimValue);
1776 1785 } else {
1777 1786 _g1rs->scrub(_region_bm, _card_bm);
1778 1787 }
1779 1788 }
1780 1789
1781 1790 };
1782 1791
1783 1792 void ConcurrentMark::cleanup() {
1784 1793 // world is stopped at this checkpoint
1785 1794 assert(SafepointSynchronize::is_at_safepoint(),
1786 1795 "world should be stopped");
1787 1796 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1788 1797
1789 1798 // If a full collection has happened, we shouldn't do this.
1790 1799 if (has_aborted()) {
1791 1800 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1792 1801 return;
1793 1802 }
1794 1803
1795 1804 HRSPhaseSetter x(HRSPhaseCleanup);
1796 1805 g1h->verify_region_sets_optional();
1797 1806
1798 1807 if (VerifyDuringGC) {
1799 1808 HandleMark hm; // handle scope
1800 1809 gclog_or_tty->print(" VerifyDuringGC:(before)");
1801 1810 Universe::heap()->prepare_for_verify();
1802 1811 Universe::verify(/* silent */ false,
1803 1812 /* option */ VerifyOption_G1UsePrevMarking);
1804 1813 }
1805 1814
1806 1815 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1807 1816 g1p->record_concurrent_mark_cleanup_start();
1808 1817
1809 1818 double start = os::elapsedTime();
1810 1819
1811 1820 HeapRegionRemSet::reset_for_cleanup_tasks();
1812 1821
1813 1822 uint n_workers;
1814 1823
1815 1824 // Do counting once more with the world stopped for good measure.
1816 1825 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1817 1826
1818 1827 if (G1CollectedHeap::use_parallel_gc_threads()) {
1819 1828 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1820 1829 "sanity check");
1821 1830
1822 1831 g1h->set_par_threads();
1823 1832 n_workers = g1h->n_par_threads();
1824 1833 assert(g1h->n_par_threads() == n_workers,
1825 1834 "Should not have been reset");
1826 1835 g1h->workers()->run_task(&g1_par_count_task);
1827 1836 // Done with the parallel phase so reset to 0.
1828 1837 g1h->set_par_threads(0);
1829 1838
1830 1839 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1831 1840 "sanity check");
1832 1841 } else {
1833 1842 n_workers = 1;
1834 1843 g1_par_count_task.work(0);
1835 1844 }
1836 1845
1837 1846 if (VerifyDuringGC) {
1838 1847 // Verify that the counting data accumulated during marking matches
1839 1848 // that calculated by walking the marking bitmap.
1840 1849
1841 1850 // Bitmaps to hold expected values
1842 1851 BitMap expected_region_bm(_region_bm.size(), false);
1843 1852 BitMap expected_card_bm(_card_bm.size(), false);
1844 1853
1845 1854 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1846 1855 &_region_bm,
1847 1856 &_card_bm,
1848 1857 &expected_region_bm,
1849 1858 &expected_card_bm);
1850 1859
1851 1860 if (G1CollectedHeap::use_parallel_gc_threads()) {
1852 1861 g1h->set_par_threads((int)n_workers);
1853 1862 g1h->workers()->run_task(&g1_par_verify_task);
1854 1863 // Done with the parallel phase so reset to 0.
1855 1864 g1h->set_par_threads(0);
1856 1865
1857 1866 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1858 1867 "sanity check");
1859 1868 } else {
1860 1869 g1_par_verify_task.work(0);
1861 1870 }
1862 1871
1863 1872 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1864 1873 }
1865 1874
1866 1875 size_t start_used_bytes = g1h->used();
1867 1876 g1h->set_marking_complete();
1868 1877
1869 1878 double count_end = os::elapsedTime();
1870 1879 double this_final_counting_time = (count_end - start);
1871 1880 _total_counting_time += this_final_counting_time;
1872 1881
1873 1882 if (G1PrintRegionLivenessInfo) {
1874 1883 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1875 1884 _g1h->heap_region_iterate(&cl);
1876 1885 }
1877 1886
1878 1887 // Install newly created mark bitMap as "prev".
1879 1888 swapMarkBitMaps();
1880 1889
1881 1890 g1h->reset_gc_time_stamp();
1882 1891
1883 1892 // Note end of marking in all heap regions.
1884 1893 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1885 1894 if (G1CollectedHeap::use_parallel_gc_threads()) {
1886 1895 g1h->set_par_threads((int)n_workers);
1887 1896 g1h->workers()->run_task(&g1_par_note_end_task);
1888 1897 g1h->set_par_threads(0);
1889 1898
1890 1899 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1891 1900 "sanity check");
1892 1901 } else {
1893 1902 g1_par_note_end_task.work(0);
1894 1903 }
1895 1904 g1h->check_gc_time_stamps();
1896 1905
1897 1906 if (!cleanup_list_is_empty()) {
1898 1907 // The cleanup list is not empty, so we'll have to process it
1899 1908 // concurrently. Notify anyone else that might be wanting free
1900 1909 // regions that there will be more free regions coming soon.
1901 1910 g1h->set_free_regions_coming();
1902 1911 }
1903 1912
1904 1913 // call below, since it affects the metric by which we sort the heap
1905 1914 // regions.
1906 1915 if (G1ScrubRemSets) {
1907 1916 double rs_scrub_start = os::elapsedTime();
1908 1917 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1909 1918 if (G1CollectedHeap::use_parallel_gc_threads()) {
1910 1919 g1h->set_par_threads((int)n_workers);
1911 1920 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1912 1921 g1h->set_par_threads(0);
1913 1922
1914 1923 assert(g1h->check_heap_region_claim_values(
1915 1924 HeapRegion::ScrubRemSetClaimValue),
1916 1925 "sanity check");
1917 1926 } else {
1918 1927 g1_par_scrub_rs_task.work(0);
1919 1928 }
1920 1929
1921 1930 double rs_scrub_end = os::elapsedTime();
1922 1931 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1923 1932 _total_rs_scrub_time += this_rs_scrub_time;
1924 1933 }
1925 1934
1926 1935 // this will also free any regions totally full of garbage objects,
1927 1936 // and sort the regions.
1928 1937 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1929 1938
1930 1939 // Statistics.
1931 1940 double end = os::elapsedTime();
1932 1941 _cleanup_times.add((end - start) * 1000.0);
1933 1942
1934 1943 if (G1Log::fine()) {
1935 1944 g1h->print_size_transition(gclog_or_tty,
1936 1945 start_used_bytes,
1937 1946 g1h->used(),
1938 1947 g1h->capacity());
1939 1948 }
1940 1949
1941 1950 // Clean up will have freed any regions completely full of garbage.
1942 1951 // Update the soft reference policy with the new heap occupancy.
1943 1952 Universe::update_heap_info_at_gc();
1944 1953
1945 1954 // We need to make this be a "collection" so any collection pause that
1946 1955 // races with it goes around and waits for completeCleanup to finish.
1947 1956 g1h->increment_total_collections();
1948 1957
1949 1958 // We reclaimed old regions so we should calculate the sizes to make
1950 1959 // sure we update the old gen/space data.
1951 1960 g1h->g1mm()->update_sizes();
1952 1961
1953 1962 if (VerifyDuringGC) {
1954 1963 HandleMark hm; // handle scope
1955 1964 gclog_or_tty->print(" VerifyDuringGC:(after)");
1956 1965 Universe::heap()->prepare_for_verify();
1957 1966 Universe::verify(/* silent */ false,
1958 1967 /* option */ VerifyOption_G1UsePrevMarking);
1959 1968 }
1960 1969
1961 1970 g1h->verify_region_sets_optional();
1962 1971 }
1963 1972
1964 1973 void ConcurrentMark::completeCleanup() {
1965 1974 if (has_aborted()) return;
1966 1975
1967 1976 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1968 1977
1969 1978 _cleanup_list.verify_optional();
1970 1979 FreeRegionList tmp_free_list("Tmp Free List");
1971 1980
1972 1981 if (G1ConcRegionFreeingVerbose) {
1973 1982 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1974 1983 "cleanup list has %u entries",
1975 1984 _cleanup_list.length());
1976 1985 }
1977 1986
1978 1987 // Noone else should be accessing the _cleanup_list at this point,
1979 1988 // so it's not necessary to take any locks
1980 1989 while (!_cleanup_list.is_empty()) {
1981 1990 HeapRegion* hr = _cleanup_list.remove_head();
1982 1991 assert(hr != NULL, "the list was not empty");
1983 1992 hr->par_clear();
1984 1993 tmp_free_list.add_as_tail(hr);
1985 1994
1986 1995 // Instead of adding one region at a time to the secondary_free_list,
1987 1996 // we accumulate them in the local list and move them a few at a
1988 1997 // time. This also cuts down on the number of notify_all() calls
1989 1998 // we do during this process. We'll also append the local list when
1990 1999 // _cleanup_list is empty (which means we just removed the last
1991 2000 // region from the _cleanup_list).
1992 2001 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1993 2002 _cleanup_list.is_empty()) {
1994 2003 if (G1ConcRegionFreeingVerbose) {
1995 2004 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1996 2005 "appending %u entries to the secondary_free_list, "
1997 2006 "cleanup list still has %u entries",
1998 2007 tmp_free_list.length(),
1999 2008 _cleanup_list.length());
2000 2009 }
2001 2010
2002 2011 {
2003 2012 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2004 2013 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2005 2014 SecondaryFreeList_lock->notify_all();
2006 2015 }
2007 2016
2008 2017 if (G1StressConcRegionFreeing) {
2009 2018 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2010 2019 os::sleep(Thread::current(), (jlong) 1, false);
2011 2020 }
2012 2021 }
2013 2022 }
2014 2023 }
2015 2024 assert(tmp_free_list.is_empty(), "post-condition");
2016 2025 }
2017 2026
2018 2027 // Support closures for reference procssing in G1
2019 2028
2020 2029 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2021 2030 HeapWord* addr = (HeapWord*)obj;
2022 2031 return addr != NULL &&
2023 2032 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2024 2033 }
2025 2034
2026 2035 class G1CMKeepAliveClosure: public ExtendedOopClosure {
2027 2036 G1CollectedHeap* _g1;
2028 2037 ConcurrentMark* _cm;
2029 2038 public:
2030 2039 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2031 2040 _g1(g1), _cm(cm) {
2032 2041 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2033 2042 }
2034 2043
2035 2044 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2036 2045 virtual void do_oop( oop* p) { do_oop_work(p); }
2037 2046
2038 2047 template <class T> void do_oop_work(T* p) {
2039 2048 oop obj = oopDesc::load_decode_heap_oop(p);
2040 2049 HeapWord* addr = (HeapWord*)obj;
2041 2050
2042 2051 if (_cm->verbose_high()) {
2043 2052 gclog_or_tty->print_cr("\t[0] we're looking at location "
2044 2053 "*"PTR_FORMAT" = "PTR_FORMAT,
2045 2054 p, (void*) obj);
2046 2055 }
2047 2056
2048 2057 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2049 2058 _cm->mark_and_count(obj);
2050 2059 _cm->mark_stack_push(obj);
2051 2060 }
2052 2061 }
2053 2062 };
2054 2063
2055 2064 class G1CMDrainMarkingStackClosure: public VoidClosure {
2056 2065 ConcurrentMark* _cm;
2057 2066 CMMarkStack* _markStack;
2058 2067 G1CMKeepAliveClosure* _oopClosure;
2059 2068 public:
2060 2069 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2061 2070 G1CMKeepAliveClosure* oopClosure) :
2062 2071 _cm(cm),
2063 2072 _markStack(markStack),
2064 2073 _oopClosure(oopClosure) { }
2065 2074
2066 2075 void do_void() {
2067 2076 _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
2068 2077 }
2069 2078 };
2070 2079
2071 2080 // 'Keep Alive' closure used by parallel reference processing.
2072 2081 // An instance of this closure is used in the parallel reference processing
2073 2082 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2074 2083 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2075 2084 // placed on to discovered ref lists once so we can mark and push with no
2076 2085 // need to check whether the object has already been marked. Using the
2077 2086 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2078 2087 // operating on the global mark stack. This means that an individual
2079 2088 // worker would be doing lock-free pushes while it processes its own
2080 2089 // discovered ref list followed by drain call. If the discovered ref lists
2081 2090 // are unbalanced then this could cause interference with the other
2082 2091 // workers. Using a CMTask (and its embedded local data structures)
2083 2092 // avoids that potential interference.
2084 2093 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2085 2094 ConcurrentMark* _cm;
2086 2095 CMTask* _task;
2087 2096 int _ref_counter_limit;
2088 2097 int _ref_counter;
2089 2098 public:
2090 2099 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2091 2100 _cm(cm), _task(task),
2092 2101 _ref_counter_limit(G1RefProcDrainInterval) {
2093 2102 assert(_ref_counter_limit > 0, "sanity");
2094 2103 _ref_counter = _ref_counter_limit;
2095 2104 }
2096 2105
2097 2106 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2098 2107 virtual void do_oop( oop* p) { do_oop_work(p); }
2099 2108
2100 2109 template <class T> void do_oop_work(T* p) {
2101 2110 if (!_cm->has_overflown()) {
2102 2111 oop obj = oopDesc::load_decode_heap_oop(p);
2103 2112 if (_cm->verbose_high()) {
2104 2113 gclog_or_tty->print_cr("\t[%d] we're looking at location "
2105 2114 "*"PTR_FORMAT" = "PTR_FORMAT,
2106 2115 _task->task_id(), p, (void*) obj);
2107 2116 }
2108 2117
2109 2118 _task->deal_with_reference(obj);
2110 2119 _ref_counter--;
2111 2120
2112 2121 if (_ref_counter == 0) {
2113 2122 // We have dealt with _ref_counter_limit references, pushing them and objects
2114 2123 // reachable from them on to the local stack (and possibly the global stack).
2115 2124 // Call do_marking_step() to process these entries. We call the routine in a
2116 2125 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2117 2126 // with the entries that we've pushed as a result of the deal_with_reference
2118 2127 // calls above) or we overflow.
2119 2128 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2120 2129 // while there may still be some work to do. (See the comment at the
2121 2130 // beginning of CMTask::do_marking_step() for those conditions - one of which
2122 2131 // is reaching the specified time target.) It is only when
2123 2132 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2124 2133 // that the marking has completed.
2125 2134 do {
2126 2135 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2127 2136 _task->do_marking_step(mark_step_duration_ms,
2128 2137 false /* do_stealing */,
2129 2138 false /* do_termination */);
2130 2139 } while (_task->has_aborted() && !_cm->has_overflown());
2131 2140 _ref_counter = _ref_counter_limit;
2132 2141 }
2133 2142 } else {
2134 2143 if (_cm->verbose_high()) {
2135 2144 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2136 2145 }
2137 2146 }
2138 2147 }
2139 2148 };
2140 2149
2141 2150 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2142 2151 ConcurrentMark* _cm;
2143 2152 CMTask* _task;
2144 2153 public:
2145 2154 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2146 2155 _cm(cm), _task(task) { }
2147 2156
2148 2157 void do_void() {
2149 2158 do {
2150 2159 if (_cm->verbose_high()) {
2151 2160 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2152 2161 _task->task_id());
2153 2162 }
2154 2163
2155 2164 // We call CMTask::do_marking_step() to completely drain the local and
2156 2165 // global marking stacks. The routine is called in a loop, which we'll
2157 2166 // exit if there's nothing more to do (i.e. we'completely drained the
2158 2167 // entries that were pushed as a result of applying the
2159 2168 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2160 2169 // lists above) or we overflow the global marking stack.
2161 2170 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2162 2171 // while there may still be some work to do. (See the comment at the
2163 2172 // beginning of CMTask::do_marking_step() for those conditions - one of which
2164 2173 // is reaching the specified time target.) It is only when
2165 2174 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2166 2175 // that the marking has completed.
2167 2176
2168 2177 _task->do_marking_step(1000000000.0 /* something very large */,
2169 2178 true /* do_stealing */,
2170 2179 true /* do_termination */);
2171 2180 } while (_task->has_aborted() && !_cm->has_overflown());
2172 2181 }
2173 2182 };
2174 2183
2175 2184 // Implementation of AbstractRefProcTaskExecutor for parallel
2176 2185 // reference processing at the end of G1 concurrent marking
2177 2186
2178 2187 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2179 2188 private:
2180 2189 G1CollectedHeap* _g1h;
2181 2190 ConcurrentMark* _cm;
2182 2191 WorkGang* _workers;
2183 2192 int _active_workers;
2184 2193
2185 2194 public:
2186 2195 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2187 2196 ConcurrentMark* cm,
2188 2197 WorkGang* workers,
2189 2198 int n_workers) :
2190 2199 _g1h(g1h), _cm(cm),
2191 2200 _workers(workers), _active_workers(n_workers) { }
2192 2201
2193 2202 // Executes the given task using concurrent marking worker threads.
2194 2203 virtual void execute(ProcessTask& task);
2195 2204 virtual void execute(EnqueueTask& task);
2196 2205 };
2197 2206
2198 2207 class G1CMRefProcTaskProxy: public AbstractGangTask {
2199 2208 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2200 2209 ProcessTask& _proc_task;
2201 2210 G1CollectedHeap* _g1h;
2202 2211 ConcurrentMark* _cm;
2203 2212
2204 2213 public:
2205 2214 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2206 2215 G1CollectedHeap* g1h,
2207 2216 ConcurrentMark* cm) :
2208 2217 AbstractGangTask("Process reference objects in parallel"),
2209 2218 _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2210 2219
2211 2220 virtual void work(uint worker_id) {
2212 2221 CMTask* marking_task = _cm->task(worker_id);
2213 2222 G1CMIsAliveClosure g1_is_alive(_g1h);
2214 2223 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2215 2224 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2216 2225
2217 2226 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2218 2227 }
2219 2228 };
2220 2229
2221 2230 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2222 2231 assert(_workers != NULL, "Need parallel worker threads.");
2223 2232
2224 2233 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2225 2234
2226 2235 // We need to reset the phase for each task execution so that
2227 2236 // the termination protocol of CMTask::do_marking_step works.
2228 2237 _cm->set_phase(_active_workers, false /* concurrent */);
2229 2238 _g1h->set_par_threads(_active_workers);
2230 2239 _workers->run_task(&proc_task_proxy);
2231 2240 _g1h->set_par_threads(0);
2232 2241 }
2233 2242
2234 2243 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2235 2244 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2236 2245 EnqueueTask& _enq_task;
2237 2246
2238 2247 public:
2239 2248 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2240 2249 AbstractGangTask("Enqueue reference objects in parallel"),
2241 2250 _enq_task(enq_task) { }
2242 2251
2243 2252 virtual void work(uint worker_id) {
2244 2253 _enq_task.work(worker_id);
2245 2254 }
2246 2255 };
2247 2256
2248 2257 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2249 2258 assert(_workers != NULL, "Need parallel worker threads.");
2250 2259
2251 2260 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2252 2261
2253 2262 _g1h->set_par_threads(_active_workers);
2254 2263 _workers->run_task(&enq_task_proxy);
2255 2264 _g1h->set_par_threads(0);
2256 2265 }
2257 2266
2258 2267 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2259 2268 ResourceMark rm;
2260 2269 HandleMark hm;
2261 2270
2262 2271 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2263 2272
2264 2273 // Is alive closure.
2265 2274 G1CMIsAliveClosure g1_is_alive(g1h);
2266 2275
2267 2276 // Inner scope to exclude the cleaning of the string and symbol
2268 2277 // tables from the displayed time.
2269 2278 {
2270 2279 if (G1Log::finer()) {
2271 2280 gclog_or_tty->put(' ');
2272 2281 }
2273 2282 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2274 2283
2275 2284 ReferenceProcessor* rp = g1h->ref_processor_cm();
2276 2285
2277 2286 // See the comment in G1CollectedHeap::ref_processing_init()
2278 2287 // about how reference processing currently works in G1.
2279 2288
2280 2289 // Process weak references.
2281 2290 rp->setup_policy(clear_all_soft_refs);
2282 2291 assert(_markStack.isEmpty(), "mark stack should be empty");
2283 2292
2284 2293 G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2285 2294 G1CMDrainMarkingStackClosure
2286 2295 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2287 2296
2288 2297 // We use the work gang from the G1CollectedHeap and we utilize all
2289 2298 // the worker threads.
2290 2299 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2291 2300 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2292 2301
2293 2302 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2294 2303 g1h->workers(), active_workers);
2295 2304
2296 2305 if (rp->processing_is_mt()) {
2297 2306 // Set the degree of MT here. If the discovery is done MT, there
2298 2307 // may have been a different number of threads doing the discovery
2299 2308 // and a different number of discovered lists may have Ref objects.
2300 2309 // That is OK as long as the Reference lists are balanced (see
2301 2310 // balance_all_queues() and balance_queues()).
2302 2311 rp->set_active_mt_degree(active_workers);
2303 2312
2304 2313 rp->process_discovered_references(&g1_is_alive,
2305 2314 &g1_keep_alive,
2306 2315 &g1_drain_mark_stack,
2307 2316 &par_task_executor);
2308 2317
2309 2318 // The work routines of the parallel keep_alive and drain_marking_stack
2310 2319 // will set the has_overflown flag if we overflow the global marking
2311 2320 // stack.
2312 2321 } else {
2313 2322 rp->process_discovered_references(&g1_is_alive,
2314 2323 &g1_keep_alive,
2315 2324 &g1_drain_mark_stack,
2316 2325 NULL);
2317 2326 }
2318 2327
2319 2328 assert(_markStack.overflow() || _markStack.isEmpty(),
2320 2329 "mark stack should be empty (unless it overflowed)");
2321 2330 if (_markStack.overflow()) {
2322 2331 // Should have been done already when we tried to push an
2323 2332 // entry on to the global mark stack. But let's do it again.
2324 2333 set_has_overflown();
2325 2334 }
2326 2335
2327 2336 if (rp->processing_is_mt()) {
2328 2337 assert(rp->num_q() == active_workers, "why not");
2329 2338 rp->enqueue_discovered_references(&par_task_executor);
2330 2339 } else {
2331 2340 rp->enqueue_discovered_references();
2332 2341 }
2333 2342
2334 2343 rp->verify_no_references_recorded();
2335 2344 assert(!rp->discovery_enabled(), "Post condition");
2336 2345 }
2337 2346
2338 2347 // Now clean up stale oops in StringTable
2339 2348 StringTable::unlink(&g1_is_alive);
2340 2349 // Clean up unreferenced symbols in symbol table.
2341 2350 SymbolTable::unlink();
2342 2351 }
2343 2352
2344 2353 void ConcurrentMark::swapMarkBitMaps() {
2345 2354 CMBitMapRO* temp = _prevMarkBitMap;
2346 2355 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2347 2356 _nextMarkBitMap = (CMBitMap*) temp;
2348 2357 }
2349 2358
2350 2359 class CMRemarkTask: public AbstractGangTask {
2351 2360 private:
2352 2361 ConcurrentMark *_cm;
2353 2362
2354 2363 public:
2355 2364 void work(uint worker_id) {
2356 2365 // Since all available tasks are actually started, we should
2357 2366 // only proceed if we're supposed to be actived.
2358 2367 if (worker_id < _cm->active_tasks()) {
2359 2368 CMTask* task = _cm->task(worker_id);
2360 2369 task->record_start_time();
2361 2370 do {
2362 2371 task->do_marking_step(1000000000.0 /* something very large */,
2363 2372 true /* do_stealing */,
2364 2373 true /* do_termination */);
2365 2374 } while (task->has_aborted() && !_cm->has_overflown());
2366 2375 // If we overflow, then we do not want to restart. We instead
2367 2376 // want to abort remark and do concurrent marking again.
2368 2377 task->record_end_time();
2369 2378 }
2370 2379 }
2371 2380
2372 2381 CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2373 2382 AbstractGangTask("Par Remark"), _cm(cm) {
2374 2383 _cm->terminator()->reset_for_reuse(active_workers);
2375 2384 }
2376 2385 };
2377 2386
2378 2387 void ConcurrentMark::checkpointRootsFinalWork() {
2379 2388 ResourceMark rm;
2380 2389 HandleMark hm;
2381 2390 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2382 2391
2383 2392 g1h->ensure_parsability(false);
2384 2393
2385 2394 if (G1CollectedHeap::use_parallel_gc_threads()) {
2386 2395 G1CollectedHeap::StrongRootsScope srs(g1h);
2387 2396 // this is remark, so we'll use up all active threads
2388 2397 uint active_workers = g1h->workers()->active_workers();
2389 2398 if (active_workers == 0) {
2390 2399 assert(active_workers > 0, "Should have been set earlier");
2391 2400 active_workers = (uint) ParallelGCThreads;
2392 2401 g1h->workers()->set_active_workers(active_workers);
2393 2402 }
2394 2403 set_phase(active_workers, false /* concurrent */);
2395 2404 // Leave _parallel_marking_threads at it's
2396 2405 // value originally calculated in the ConcurrentMark
2397 2406 // constructor and pass values of the active workers
2398 2407 // through the gang in the task.
2399 2408
2400 2409 CMRemarkTask remarkTask(this, active_workers);
2401 2410 g1h->set_par_threads(active_workers);
2402 2411 g1h->workers()->run_task(&remarkTask);
2403 2412 g1h->set_par_threads(0);
2404 2413 } else {
2405 2414 G1CollectedHeap::StrongRootsScope srs(g1h);
2406 2415 // this is remark, so we'll use up all available threads
2407 2416 uint active_workers = 1;
2408 2417 set_phase(active_workers, false /* concurrent */);
2409 2418
2410 2419 CMRemarkTask remarkTask(this, active_workers);
2411 2420 // We will start all available threads, even if we decide that the
2412 2421 // active_workers will be fewer. The extra ones will just bail out
2413 2422 // immediately.
2414 2423 remarkTask.work(0);
2415 2424 }
2416 2425 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2417 2426 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2418 2427
2419 2428 print_stats();
2420 2429
2421 2430 #if VERIFY_OBJS_PROCESSED
2422 2431 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2423 2432 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2424 2433 _scan_obj_cl.objs_processed,
2425 2434 ThreadLocalObjQueue::objs_enqueued);
2426 2435 guarantee(_scan_obj_cl.objs_processed ==
2427 2436 ThreadLocalObjQueue::objs_enqueued,
2428 2437 "Different number of objs processed and enqueued.");
2429 2438 }
2430 2439 #endif
2431 2440 }
2432 2441
2433 2442 #ifndef PRODUCT
2434 2443
2435 2444 class PrintReachableOopClosure: public OopClosure {
2436 2445 private:
2437 2446 G1CollectedHeap* _g1h;
2438 2447 outputStream* _out;
2439 2448 VerifyOption _vo;
2440 2449 bool _all;
2441 2450
2442 2451 public:
2443 2452 PrintReachableOopClosure(outputStream* out,
2444 2453 VerifyOption vo,
2445 2454 bool all) :
2446 2455 _g1h(G1CollectedHeap::heap()),
2447 2456 _out(out), _vo(vo), _all(all) { }
2448 2457
2449 2458 void do_oop(narrowOop* p) { do_oop_work(p); }
2450 2459 void do_oop( oop* p) { do_oop_work(p); }
2451 2460
2452 2461 template <class T> void do_oop_work(T* p) {
2453 2462 oop obj = oopDesc::load_decode_heap_oop(p);
2454 2463 const char* str = NULL;
2455 2464 const char* str2 = "";
2456 2465
2457 2466 if (obj == NULL) {
2458 2467 str = "";
2459 2468 } else if (!_g1h->is_in_g1_reserved(obj)) {
2460 2469 str = " O";
2461 2470 } else {
2462 2471 HeapRegion* hr = _g1h->heap_region_containing(obj);
2463 2472 guarantee(hr != NULL, "invariant");
2464 2473 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2465 2474 bool marked = _g1h->is_marked(obj, _vo);
2466 2475
2467 2476 if (over_tams) {
2468 2477 str = " >";
2469 2478 if (marked) {
2470 2479 str2 = " AND MARKED";
2471 2480 }
2472 2481 } else if (marked) {
2473 2482 str = " M";
2474 2483 } else {
2475 2484 str = " NOT";
2476 2485 }
2477 2486 }
2478 2487
2479 2488 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2480 2489 p, (void*) obj, str, str2);
2481 2490 }
2482 2491 };
2483 2492
2484 2493 class PrintReachableObjectClosure : public ObjectClosure {
2485 2494 private:
2486 2495 G1CollectedHeap* _g1h;
2487 2496 outputStream* _out;
2488 2497 VerifyOption _vo;
2489 2498 bool _all;
2490 2499 HeapRegion* _hr;
2491 2500
2492 2501 public:
2493 2502 PrintReachableObjectClosure(outputStream* out,
2494 2503 VerifyOption vo,
2495 2504 bool all,
2496 2505 HeapRegion* hr) :
2497 2506 _g1h(G1CollectedHeap::heap()),
2498 2507 _out(out), _vo(vo), _all(all), _hr(hr) { }
2499 2508
2500 2509 void do_object(oop o) {
2501 2510 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2502 2511 bool marked = _g1h->is_marked(o, _vo);
2503 2512 bool print_it = _all || over_tams || marked;
2504 2513
2505 2514 if (print_it) {
2506 2515 _out->print_cr(" "PTR_FORMAT"%s",
2507 2516 o, (over_tams) ? " >" : (marked) ? " M" : "");
2508 2517 PrintReachableOopClosure oopCl(_out, _vo, _all);
2509 2518 o->oop_iterate_no_header(&oopCl);
2510 2519 }
2511 2520 }
2512 2521 };
2513 2522
2514 2523 class PrintReachableRegionClosure : public HeapRegionClosure {
2515 2524 private:
2516 2525 G1CollectedHeap* _g1h;
2517 2526 outputStream* _out;
2518 2527 VerifyOption _vo;
2519 2528 bool _all;
2520 2529
2521 2530 public:
2522 2531 bool doHeapRegion(HeapRegion* hr) {
2523 2532 HeapWord* b = hr->bottom();
2524 2533 HeapWord* e = hr->end();
2525 2534 HeapWord* t = hr->top();
2526 2535 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2527 2536 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2528 2537 "TAMS: "PTR_FORMAT, b, e, t, p);
2529 2538 _out->cr();
2530 2539
2531 2540 HeapWord* from = b;
2532 2541 HeapWord* to = t;
2533 2542
2534 2543 if (to > from) {
2535 2544 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2536 2545 _out->cr();
2537 2546 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2538 2547 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2539 2548 _out->cr();
2540 2549 }
2541 2550
2542 2551 return false;
2543 2552 }
2544 2553
2545 2554 PrintReachableRegionClosure(outputStream* out,
2546 2555 VerifyOption vo,
2547 2556 bool all) :
2548 2557 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2549 2558 };
2550 2559
2551 2560 void ConcurrentMark::print_reachable(const char* str,
2552 2561 VerifyOption vo,
2553 2562 bool all) {
2554 2563 gclog_or_tty->cr();
2555 2564 gclog_or_tty->print_cr("== Doing heap dump... ");
2556 2565
2557 2566 if (G1PrintReachableBaseFile == NULL) {
2558 2567 gclog_or_tty->print_cr(" #### error: no base file defined");
2559 2568 return;
2560 2569 }
2561 2570
2562 2571 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2563 2572 (JVM_MAXPATHLEN - 1)) {
2564 2573 gclog_or_tty->print_cr(" #### error: file name too long");
2565 2574 return;
2566 2575 }
2567 2576
2568 2577 char file_name[JVM_MAXPATHLEN];
2569 2578 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2570 2579 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2571 2580
2572 2581 fileStream fout(file_name);
2573 2582 if (!fout.is_open()) {
2574 2583 gclog_or_tty->print_cr(" #### error: could not open file");
2575 2584 return;
2576 2585 }
2577 2586
2578 2587 outputStream* out = &fout;
2579 2588 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2580 2589 out->cr();
2581 2590
2582 2591 out->print_cr("--- ITERATING OVER REGIONS");
2583 2592 out->cr();
2584 2593 PrintReachableRegionClosure rcl(out, vo, all);
2585 2594 _g1h->heap_region_iterate(&rcl);
2586 2595 out->cr();
2587 2596
2588 2597 gclog_or_tty->print_cr(" done");
2589 2598 gclog_or_tty->flush();
2590 2599 }
2591 2600
2592 2601 #endif // PRODUCT
2593 2602
2594 2603 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2595 2604 // Note we are overriding the read-only view of the prev map here, via
2596 2605 // the cast.
2597 2606 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2598 2607 }
2599 2608
2600 2609 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2601 2610 _nextMarkBitMap->clearRange(mr);
2602 2611 }
2603 2612
2604 2613 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2605 2614 clearRangePrevBitmap(mr);
2606 2615 clearRangeNextBitmap(mr);
2607 2616 }
2608 2617
2609 2618 HeapRegion*
2610 2619 ConcurrentMark::claim_region(int task_num) {
2611 2620 // "checkpoint" the finger
2612 2621 HeapWord* finger = _finger;
2613 2622
2614 2623 // _heap_end will not change underneath our feet; it only changes at
2615 2624 // yield points.
2616 2625 while (finger < _heap_end) {
2617 2626 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2618 2627
2619 2628 // Note on how this code handles humongous regions. In the
2620 2629 // normal case the finger will reach the start of a "starts
2621 2630 // humongous" (SH) region. Its end will either be the end of the
2622 2631 // last "continues humongous" (CH) region in the sequence, or the
2623 2632 // standard end of the SH region (if the SH is the only region in
2624 2633 // the sequence). That way claim_region() will skip over the CH
2625 2634 // regions. However, there is a subtle race between a CM thread
2626 2635 // executing this method and a mutator thread doing a humongous
2627 2636 // object allocation. The two are not mutually exclusive as the CM
2628 2637 // thread does not need to hold the Heap_lock when it gets
2629 2638 // here. So there is a chance that claim_region() will come across
2630 2639 // a free region that's in the progress of becoming a SH or a CH
2631 2640 // region. In the former case, it will either
2632 2641 // a) Miss the update to the region's end, in which case it will
2633 2642 // visit every subsequent CH region, will find their bitmaps
2634 2643 // empty, and do nothing, or
2635 2644 // b) Will observe the update of the region's end (in which case
2636 2645 // it will skip the subsequent CH regions).
2637 2646 // If it comes across a region that suddenly becomes CH, the
2638 2647 // scenario will be similar to b). So, the race between
2639 2648 // claim_region() and a humongous object allocation might force us
2640 2649 // to do a bit of unnecessary work (due to some unnecessary bitmap
2641 2650 // iterations) but it should not introduce and correctness issues.
2642 2651 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2643 2652 HeapWord* bottom = curr_region->bottom();
2644 2653 HeapWord* end = curr_region->end();
2645 2654 HeapWord* limit = curr_region->next_top_at_mark_start();
2646 2655
2647 2656 if (verbose_low()) {
2648 2657 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2649 2658 "["PTR_FORMAT", "PTR_FORMAT"), "
2650 2659 "limit = "PTR_FORMAT,
2651 2660 task_num, curr_region, bottom, end, limit);
2652 2661 }
2653 2662
2654 2663 // Is the gap between reading the finger and doing the CAS too long?
2655 2664 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2656 2665 if (res == finger) {
2657 2666 // we succeeded
2658 2667
2659 2668 // notice that _finger == end cannot be guaranteed here since,
2660 2669 // someone else might have moved the finger even further
2661 2670 assert(_finger >= end, "the finger should have moved forward");
2662 2671
2663 2672 if (verbose_low()) {
2664 2673 gclog_or_tty->print_cr("[%d] we were successful with region = "
2665 2674 PTR_FORMAT, task_num, curr_region);
2666 2675 }
2667 2676
2668 2677 if (limit > bottom) {
2669 2678 if (verbose_low()) {
2670 2679 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2671 2680 "returning it ", task_num, curr_region);
2672 2681 }
2673 2682 return curr_region;
2674 2683 } else {
2675 2684 assert(limit == bottom,
2676 2685 "the region limit should be at bottom");
2677 2686 if (verbose_low()) {
2678 2687 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2679 2688 "returning NULL", task_num, curr_region);
2680 2689 }
2681 2690 // we return NULL and the caller should try calling
2682 2691 // claim_region() again.
2683 2692 return NULL;
2684 2693 }
2685 2694 } else {
2686 2695 assert(_finger > finger, "the finger should have moved forward");
2687 2696 if (verbose_low()) {
2688 2697 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2689 2698 "global finger = "PTR_FORMAT", "
2690 2699 "our finger = "PTR_FORMAT,
2691 2700 task_num, _finger, finger);
2692 2701 }
2693 2702
2694 2703 // read it again
2695 2704 finger = _finger;
2696 2705 }
2697 2706 }
2698 2707
2699 2708 return NULL;
2700 2709 }
2701 2710
2702 2711 #ifndef PRODUCT
2703 2712 enum VerifyNoCSetOopsPhase {
2704 2713 VerifyNoCSetOopsStack,
2705 2714 VerifyNoCSetOopsQueues,
2706 2715 VerifyNoCSetOopsSATBCompleted,
2707 2716 VerifyNoCSetOopsSATBThread
2708 2717 };
2709 2718
2710 2719 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
2711 2720 private:
2712 2721 G1CollectedHeap* _g1h;
2713 2722 VerifyNoCSetOopsPhase _phase;
2714 2723 int _info;
2715 2724
2716 2725 const char* phase_str() {
2717 2726 switch (_phase) {
2718 2727 case VerifyNoCSetOopsStack: return "Stack";
2719 2728 case VerifyNoCSetOopsQueues: return "Queue";
2720 2729 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2721 2730 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
2722 2731 default: ShouldNotReachHere();
2723 2732 }
2724 2733 return NULL;
2725 2734 }
2726 2735
2727 2736 void do_object_work(oop obj) {
2728 2737 guarantee(!_g1h->obj_in_cs(obj),
2729 2738 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2730 2739 (void*) obj, phase_str(), _info));
2731 2740 }
2732 2741
2733 2742 public:
2734 2743 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2735 2744
2736 2745 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2737 2746 _phase = phase;
2738 2747 _info = info;
2739 2748 }
2740 2749
2741 2750 virtual void do_oop(oop* p) {
2742 2751 oop obj = oopDesc::load_decode_heap_oop(p);
2743 2752 do_object_work(obj);
2744 2753 }
2745 2754
2746 2755 virtual void do_oop(narrowOop* p) {
2747 2756 // We should not come across narrow oops while scanning marking
2748 2757 // stacks and SATB buffers.
2749 2758 ShouldNotReachHere();
2750 2759 }
2751 2760
2752 2761 virtual void do_object(oop obj) {
2753 2762 do_object_work(obj);
2754 2763 }
2755 2764 };
2756 2765
2757 2766 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2758 2767 bool verify_enqueued_buffers,
2759 2768 bool verify_thread_buffers,
2760 2769 bool verify_fingers) {
2761 2770 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2762 2771 if (!G1CollectedHeap::heap()->mark_in_progress()) {
2763 2772 return;
2764 2773 }
2765 2774
2766 2775 VerifyNoCSetOopsClosure cl;
2767 2776
2768 2777 if (verify_stacks) {
2769 2778 // Verify entries on the global mark stack
2770 2779 cl.set_phase(VerifyNoCSetOopsStack);
2771 2780 _markStack.oops_do(&cl);
2772 2781
2773 2782 // Verify entries on the task queues
2774 2783 for (int i = 0; i < (int) _max_task_num; i += 1) {
2775 2784 cl.set_phase(VerifyNoCSetOopsQueues, i);
2776 2785 OopTaskQueue* queue = _task_queues->queue(i);
2777 2786 queue->oops_do(&cl);
2778 2787 }
2779 2788 }
2780 2789
2781 2790 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2782 2791
2783 2792 // Verify entries on the enqueued SATB buffers
2784 2793 if (verify_enqueued_buffers) {
2785 2794 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2786 2795 satb_qs.iterate_completed_buffers_read_only(&cl);
2787 2796 }
2788 2797
2789 2798 // Verify entries on the per-thread SATB buffers
2790 2799 if (verify_thread_buffers) {
2791 2800 cl.set_phase(VerifyNoCSetOopsSATBThread);
2792 2801 satb_qs.iterate_thread_buffers_read_only(&cl);
2793 2802 }
2794 2803
2795 2804 if (verify_fingers) {
2796 2805 // Verify the global finger
2797 2806 HeapWord* global_finger = finger();
2798 2807 if (global_finger != NULL && global_finger < _heap_end) {
2799 2808 // The global finger always points to a heap region boundary. We
2800 2809 // use heap_region_containing_raw() to get the containing region
2801 2810 // given that the global finger could be pointing to a free region
2802 2811 // which subsequently becomes continues humongous. If that
2803 2812 // happens, heap_region_containing() will return the bottom of the
2804 2813 // corresponding starts humongous region and the check below will
2805 2814 // not hold any more.
2806 2815 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2807 2816 guarantee(global_finger == global_hr->bottom(),
2808 2817 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2809 2818 global_finger, HR_FORMAT_PARAMS(global_hr)));
2810 2819 }
2811 2820
2812 2821 // Verify the task fingers
2813 2822 assert(parallel_marking_threads() <= _max_task_num, "sanity");
2814 2823 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2815 2824 CMTask* task = _tasks[i];
2816 2825 HeapWord* task_finger = task->finger();
2817 2826 if (task_finger != NULL && task_finger < _heap_end) {
2818 2827 // See above note on the global finger verification.
2819 2828 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2820 2829 guarantee(task_finger == task_hr->bottom() ||
2821 2830 !task_hr->in_collection_set(),
2822 2831 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2823 2832 task_finger, HR_FORMAT_PARAMS(task_hr)));
2824 2833 }
2825 2834 }
2826 2835 }
2827 2836 }
2828 2837 #endif // PRODUCT
2829 2838
2830 2839 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2831 2840 _markStack.setEmpty();
2832 2841 _markStack.clear_overflow();
2833 2842 if (clear_overflow) {
2834 2843 clear_has_overflown();
2835 2844 } else {
2836 2845 assert(has_overflown(), "pre-condition");
2837 2846 }
2838 2847 _finger = _heap_start;
↓ open down ↓ |
1219 lines elided |
↑ open up ↑ |
2839 2848
2840 2849 for (int i = 0; i < (int)_max_task_num; ++i) {
2841 2850 OopTaskQueue* queue = _task_queues->queue(i);
2842 2851 queue->set_empty();
2843 2852 }
2844 2853 }
2845 2854
2846 2855 // Aggregate the counting data that was constructed concurrently
2847 2856 // with marking.
2848 2857 class AggregateCountDataHRClosure: public HeapRegionClosure {
2858 + G1CollectedHeap* _g1h;
2849 2859 ConcurrentMark* _cm;
2860 + CardTableModRefBS* _ct_bs;
2850 2861 BitMap* _cm_card_bm;
2851 2862 size_t _max_task_num;
2852 2863
2853 2864 public:
2854 - AggregateCountDataHRClosure(ConcurrentMark *cm,
2865 + AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2855 2866 BitMap* cm_card_bm,
2856 2867 size_t max_task_num) :
2857 - _cm(cm), _cm_card_bm(cm_card_bm),
2858 - _max_task_num(max_task_num) { }
2859 -
2860 - bool is_card_aligned(HeapWord* p) {
2861 - return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
2862 - }
2868 + _g1h(g1h), _cm(g1h->concurrent_mark()),
2869 + _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2870 + _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { }
2863 2871
2864 2872 bool doHeapRegion(HeapRegion* hr) {
2865 2873 if (hr->continuesHumongous()) {
2866 2874 // We will ignore these here and process them when their
2867 2875 // associated "starts humongous" region is processed.
2868 2876 // Note that we cannot rely on their associated
2869 2877 // "starts humongous" region to have their bit set to 1
2870 2878 // since, due to the region chunking in the parallel region
2871 2879 // iteration, a "continues humongous" region might be visited
2872 2880 // before its associated "starts humongous".
2873 2881 return false;
2874 2882 }
2875 2883
2876 2884 HeapWord* start = hr->bottom();
2877 2885 HeapWord* limit = hr->next_top_at_mark_start();
2878 2886 HeapWord* end = hr->end();
2879 2887
2880 2888 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2881 2889 err_msg("Preconditions not met - "
2882 2890 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
2883 2891 "top: "PTR_FORMAT", end: "PTR_FORMAT,
2884 2892 start, limit, hr->top(), hr->end()));
2885 2893
2886 2894 assert(hr->next_marked_bytes() == 0, "Precondition");
2887 2895
2888 2896 if (start == limit) {
2889 2897 // NTAMS of this region has not been set so nothing to do.
2890 2898 return false;
2891 2899 }
2892 2900
2893 - assert(is_card_aligned(start), "sanity");
2894 - assert(is_card_aligned(end), "sanity");
2901 + // 'start' should be in the heap.
2902 + assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2903 + // 'end' *may* be outside the heap (if hr is the last region)
2904 + assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2895 2905
2896 2906 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2897 2907 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2898 2908 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2899 2909
2900 - // If ntams is not card aligned then we bump the index for
2901 - // limit so that we get the card spanning ntams.
2902 - if (!is_card_aligned(limit)) {
2910 + // If ntams is not card aligned then we bump card bitmap index
2911 + // for limit so that we get the all the cards spanned by
2912 + // the object ending at ntams.
2913 + // Note: if this is the last region in the heap then ntams
2914 + // *may* be outside the heap and limit_idx will correspond
2915 + // to a card that is also outside the heap.
2916 + if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2903 2917 limit_idx += 1;
2904 2918 }
2905 2919
2906 2920 assert(limit_idx <= end_idx, "or else use atomics");
2907 2921
2908 2922 // Aggregate the "stripe" in the count data associated with hr.
2909 2923 uint hrs_index = hr->hrs_index();
2910 2924 size_t marked_bytes = 0;
2911 2925
2912 2926 for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2913 2927 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2914 2928 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2915 2929
2916 2930 // Fetch the marked_bytes in this region for task i and
2917 2931 // add it to the running total for this region.
2918 2932 marked_bytes += marked_bytes_array[hrs_index];
2919 2933
2920 2934 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
2921 2935 // into the global card bitmap.
2922 2936 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2923 2937
2924 2938 while (scan_idx < limit_idx) {
2925 2939 assert(task_card_bm->at(scan_idx) == true, "should be");
2926 2940 _cm_card_bm->set_bit(scan_idx);
2927 2941 assert(_cm_card_bm->at(scan_idx) == true, "should be");
2928 2942
2929 2943 // BitMap::get_next_one_offset() can handle the case when
2930 2944 // its left_offset parameter is greater than its right_offset
2931 - // parameter. If does, however, have an early exit if
2945 + // parameter. It does, however, have an early exit if
2932 2946 // left_offset == right_offset. So let's limit the value
2933 2947 // passed in for left offset here.
2934 2948 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2935 2949 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2936 2950 }
2937 2951 }
2938 2952
2939 2953 // Update the marked bytes for this region.
2940 2954 hr->add_to_marked_bytes(marked_bytes);
2941 2955
2942 2956 // Next heap region
2943 2957 return false;
2944 2958 }
2945 2959 };
2946 2960
2947 2961 class G1AggregateCountDataTask: public AbstractGangTask {
2948 2962 protected:
2949 2963 G1CollectedHeap* _g1h;
2950 2964 ConcurrentMark* _cm;
2951 2965 BitMap* _cm_card_bm;
2952 2966 size_t _max_task_num;
2953 2967 int _active_workers;
2954 2968
2955 2969 public:
2956 2970 G1AggregateCountDataTask(G1CollectedHeap* g1h,
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
2957 2971 ConcurrentMark* cm,
2958 2972 BitMap* cm_card_bm,
2959 2973 size_t max_task_num,
2960 2974 int n_workers) :
2961 2975 AbstractGangTask("Count Aggregation"),
2962 2976 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2963 2977 _max_task_num(max_task_num),
2964 2978 _active_workers(n_workers) { }
2965 2979
2966 2980 void work(uint worker_id) {
2967 - AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
2981 + AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num);
2968 2982
2969 2983 if (G1CollectedHeap::use_parallel_gc_threads()) {
2970 2984 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2971 2985 _active_workers,
2972 2986 HeapRegion::AggregateCountClaimValue);
2973 2987 } else {
2974 2988 _g1h->heap_region_iterate(&cl);
2975 2989 }
2976 2990 }
2977 2991 };
2978 2992
2979 2993
2980 2994 void ConcurrentMark::aggregate_count_data() {
2981 2995 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
2982 2996 _g1h->workers()->active_workers() :
2983 2997 1);
2984 2998
2985 2999 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2986 3000 _max_task_num, n_workers);
2987 3001
2988 3002 if (G1CollectedHeap::use_parallel_gc_threads()) {
2989 3003 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2990 3004 "sanity check");
2991 3005 _g1h->set_par_threads(n_workers);
2992 3006 _g1h->workers()->run_task(&g1_par_agg_task);
2993 3007 _g1h->set_par_threads(0);
2994 3008
2995 3009 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
2996 3010 "sanity check");
2997 3011 _g1h->reset_heap_region_claim_values();
2998 3012 } else {
2999 3013 g1_par_agg_task.work(0);
3000 3014 }
3001 3015 }
3002 3016
3003 3017 // Clear the per-worker arrays used to store the per-region counting data
3004 3018 void ConcurrentMark::clear_all_count_data() {
3005 3019 // Clear the global card bitmap - it will be filled during
3006 3020 // liveness count aggregation (during remark) and the
3007 3021 // final counting task.
3008 3022 _card_bm.clear();
3009 3023
3010 3024 // Clear the global region bitmap - it will be filled as part
3011 3025 // of the final counting task.
3012 3026 _region_bm.clear();
3013 3027
3014 3028 uint max_regions = _g1h->max_regions();
3015 3029 assert(_max_task_num != 0, "unitialized");
3016 3030
3017 3031 for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3018 3032 BitMap* task_card_bm = count_card_bitmap_for(i);
3019 3033 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3020 3034
3021 3035 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3022 3036 assert(marked_bytes_array != NULL, "uninitialized");
3023 3037
3024 3038 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3025 3039 task_card_bm->clear();
3026 3040 }
3027 3041 }
3028 3042
3029 3043 void ConcurrentMark::print_stats() {
3030 3044 if (verbose_stats()) {
3031 3045 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3032 3046 for (size_t i = 0; i < _active_tasks; ++i) {
3033 3047 _tasks[i]->print_stats();
3034 3048 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3035 3049 }
3036 3050 }
3037 3051 }
3038 3052
3039 3053 // abandon current marking iteration due to a Full GC
3040 3054 void ConcurrentMark::abort() {
3041 3055 // Clear all marks to force marking thread to do nothing
3042 3056 _nextMarkBitMap->clearAll();
3043 3057 // Clear the liveness counting data
3044 3058 clear_all_count_data();
3045 3059 // Empty mark stack
3046 3060 clear_marking_state();
3047 3061 for (int i = 0; i < (int)_max_task_num; ++i) {
3048 3062 _tasks[i]->clear_region_fields();
3049 3063 }
3050 3064 _has_aborted = true;
3051 3065
3052 3066 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3053 3067 satb_mq_set.abandon_partial_marking();
3054 3068 // This can be called either during or outside marking, we'll read
3055 3069 // the expected_active value from the SATB queue set.
3056 3070 satb_mq_set.set_active_all_threads(
3057 3071 false, /* new active value */
3058 3072 satb_mq_set.is_active() /* expected_active */);
3059 3073 }
3060 3074
3061 3075 static void print_ms_time_info(const char* prefix, const char* name,
3062 3076 NumberSeq& ns) {
3063 3077 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3064 3078 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3065 3079 if (ns.num() > 0) {
3066 3080 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3067 3081 prefix, ns.sd(), ns.maximum());
3068 3082 }
3069 3083 }
3070 3084
3071 3085 void ConcurrentMark::print_summary_info() {
3072 3086 gclog_or_tty->print_cr(" Concurrent marking:");
3073 3087 print_ms_time_info(" ", "init marks", _init_times);
3074 3088 print_ms_time_info(" ", "remarks", _remark_times);
3075 3089 {
3076 3090 print_ms_time_info(" ", "final marks", _remark_mark_times);
3077 3091 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3078 3092
3079 3093 }
3080 3094 print_ms_time_info(" ", "cleanups", _cleanup_times);
3081 3095 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3082 3096 _total_counting_time,
3083 3097 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3084 3098 (double)_cleanup_times.num()
3085 3099 : 0.0));
3086 3100 if (G1ScrubRemSets) {
3087 3101 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3088 3102 _total_rs_scrub_time,
3089 3103 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3090 3104 (double)_cleanup_times.num()
3091 3105 : 0.0));
3092 3106 }
3093 3107 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3094 3108 (_init_times.sum() + _remark_times.sum() +
3095 3109 _cleanup_times.sum())/1000.0);
3096 3110 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3097 3111 "(%8.2f s marking).",
3098 3112 cmThread()->vtime_accum(),
3099 3113 cmThread()->vtime_mark_accum());
3100 3114 }
3101 3115
3102 3116 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3103 3117 _parallel_workers->print_worker_threads_on(st);
3104 3118 }
3105 3119
3106 3120 // We take a break if someone is trying to stop the world.
3107 3121 bool ConcurrentMark::do_yield_check(uint worker_id) {
3108 3122 if (should_yield()) {
3109 3123 if (worker_id == 0) {
3110 3124 _g1h->g1_policy()->record_concurrent_pause();
3111 3125 }
3112 3126 cmThread()->yield();
3113 3127 return true;
3114 3128 } else {
3115 3129 return false;
3116 3130 }
3117 3131 }
3118 3132
3119 3133 bool ConcurrentMark::should_yield() {
3120 3134 return cmThread()->should_yield();
3121 3135 }
3122 3136
3123 3137 bool ConcurrentMark::containing_card_is_marked(void* p) {
3124 3138 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3125 3139 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3126 3140 }
3127 3141
3128 3142 bool ConcurrentMark::containing_cards_are_marked(void* start,
3129 3143 void* last) {
3130 3144 return containing_card_is_marked(start) &&
3131 3145 containing_card_is_marked(last);
3132 3146 }
3133 3147
3134 3148 #ifndef PRODUCT
3135 3149 // for debugging purposes
3136 3150 void ConcurrentMark::print_finger() {
3137 3151 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3138 3152 _heap_start, _heap_end, _finger);
3139 3153 for (int i = 0; i < (int) _max_task_num; ++i) {
3140 3154 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
3141 3155 }
3142 3156 gclog_or_tty->print_cr("");
3143 3157 }
3144 3158 #endif
3145 3159
3146 3160 void CMTask::scan_object(oop obj) {
3147 3161 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3148 3162
3149 3163 if (_cm->verbose_high()) {
3150 3164 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3151 3165 _task_id, (void*) obj);
3152 3166 }
3153 3167
3154 3168 size_t obj_size = obj->size();
3155 3169 _words_scanned += obj_size;
3156 3170
3157 3171 obj->oop_iterate(_cm_oop_closure);
3158 3172 statsOnly( ++_objs_scanned );
3159 3173 check_limits();
3160 3174 }
3161 3175
3162 3176 // Closure for iteration over bitmaps
3163 3177 class CMBitMapClosure : public BitMapClosure {
3164 3178 private:
3165 3179 // the bitmap that is being iterated over
3166 3180 CMBitMap* _nextMarkBitMap;
3167 3181 ConcurrentMark* _cm;
3168 3182 CMTask* _task;
3169 3183
3170 3184 public:
3171 3185 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3172 3186 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3173 3187
3174 3188 bool do_bit(size_t offset) {
3175 3189 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3176 3190 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3177 3191 assert( addr < _cm->finger(), "invariant");
3178 3192
3179 3193 statsOnly( _task->increase_objs_found_on_bitmap() );
3180 3194 assert(addr >= _task->finger(), "invariant");
3181 3195
3182 3196 // We move that task's local finger along.
3183 3197 _task->move_finger_to(addr);
3184 3198
3185 3199 _task->scan_object(oop(addr));
3186 3200 // we only partially drain the local queue and global stack
3187 3201 _task->drain_local_queue(true);
3188 3202 _task->drain_global_stack(true);
3189 3203
3190 3204 // if the has_aborted flag has been raised, we need to bail out of
3191 3205 // the iteration
3192 3206 return !_task->has_aborted();
3193 3207 }
3194 3208 };
3195 3209
3196 3210 // Closure for iterating over objects, currently only used for
3197 3211 // processing SATB buffers.
3198 3212 class CMObjectClosure : public ObjectClosure {
3199 3213 private:
3200 3214 CMTask* _task;
3201 3215
3202 3216 public:
3203 3217 void do_object(oop obj) {
3204 3218 _task->deal_with_reference(obj);
3205 3219 }
3206 3220
3207 3221 CMObjectClosure(CMTask* task) : _task(task) { }
3208 3222 };
3209 3223
3210 3224 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3211 3225 ConcurrentMark* cm,
3212 3226 CMTask* task)
3213 3227 : _g1h(g1h), _cm(cm), _task(task) {
3214 3228 assert(_ref_processor == NULL, "should be initialized to NULL");
3215 3229
3216 3230 if (G1UseConcMarkReferenceProcessing) {
3217 3231 _ref_processor = g1h->ref_processor_cm();
3218 3232 assert(_ref_processor != NULL, "should not be NULL");
3219 3233 }
3220 3234 }
3221 3235
3222 3236 void CMTask::setup_for_region(HeapRegion* hr) {
3223 3237 // Separated the asserts so that we know which one fires.
3224 3238 assert(hr != NULL,
3225 3239 "claim_region() should have filtered out continues humongous regions");
3226 3240 assert(!hr->continuesHumongous(),
3227 3241 "claim_region() should have filtered out continues humongous regions");
3228 3242
3229 3243 if (_cm->verbose_low()) {
3230 3244 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3231 3245 _task_id, hr);
3232 3246 }
3233 3247
3234 3248 _curr_region = hr;
3235 3249 _finger = hr->bottom();
3236 3250 update_region_limit();
3237 3251 }
3238 3252
3239 3253 void CMTask::update_region_limit() {
3240 3254 HeapRegion* hr = _curr_region;
3241 3255 HeapWord* bottom = hr->bottom();
3242 3256 HeapWord* limit = hr->next_top_at_mark_start();
3243 3257
3244 3258 if (limit == bottom) {
3245 3259 if (_cm->verbose_low()) {
3246 3260 gclog_or_tty->print_cr("[%d] found an empty region "
3247 3261 "["PTR_FORMAT", "PTR_FORMAT")",
3248 3262 _task_id, bottom, limit);
3249 3263 }
3250 3264 // The region was collected underneath our feet.
3251 3265 // We set the finger to bottom to ensure that the bitmap
3252 3266 // iteration that will follow this will not do anything.
3253 3267 // (this is not a condition that holds when we set the region up,
3254 3268 // as the region is not supposed to be empty in the first place)
3255 3269 _finger = bottom;
3256 3270 } else if (limit >= _region_limit) {
3257 3271 assert(limit >= _finger, "peace of mind");
3258 3272 } else {
3259 3273 assert(limit < _region_limit, "only way to get here");
3260 3274 // This can happen under some pretty unusual circumstances. An
3261 3275 // evacuation pause empties the region underneath our feet (NTAMS
3262 3276 // at bottom). We then do some allocation in the region (NTAMS
3263 3277 // stays at bottom), followed by the region being used as a GC
3264 3278 // alloc region (NTAMS will move to top() and the objects
3265 3279 // originally below it will be grayed). All objects now marked in
3266 3280 // the region are explicitly grayed, if below the global finger,
3267 3281 // and we do not need in fact to scan anything else. So, we simply
3268 3282 // set _finger to be limit to ensure that the bitmap iteration
3269 3283 // doesn't do anything.
3270 3284 _finger = limit;
3271 3285 }
3272 3286
3273 3287 _region_limit = limit;
3274 3288 }
3275 3289
3276 3290 void CMTask::giveup_current_region() {
3277 3291 assert(_curr_region != NULL, "invariant");
3278 3292 if (_cm->verbose_low()) {
3279 3293 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3280 3294 _task_id, _curr_region);
3281 3295 }
3282 3296 clear_region_fields();
3283 3297 }
3284 3298
3285 3299 void CMTask::clear_region_fields() {
3286 3300 // Values for these three fields that indicate that we're not
3287 3301 // holding on to a region.
3288 3302 _curr_region = NULL;
3289 3303 _finger = NULL;
3290 3304 _region_limit = NULL;
3291 3305 }
3292 3306
3293 3307 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3294 3308 if (cm_oop_closure == NULL) {
3295 3309 assert(_cm_oop_closure != NULL, "invariant");
3296 3310 } else {
3297 3311 assert(_cm_oop_closure == NULL, "invariant");
3298 3312 }
3299 3313 _cm_oop_closure = cm_oop_closure;
3300 3314 }
3301 3315
3302 3316 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3303 3317 guarantee(nextMarkBitMap != NULL, "invariant");
3304 3318
3305 3319 if (_cm->verbose_low()) {
3306 3320 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3307 3321 }
3308 3322
3309 3323 _nextMarkBitMap = nextMarkBitMap;
3310 3324 clear_region_fields();
3311 3325
3312 3326 _calls = 0;
3313 3327 _elapsed_time_ms = 0.0;
3314 3328 _termination_time_ms = 0.0;
3315 3329 _termination_start_time_ms = 0.0;
3316 3330
3317 3331 #if _MARKING_STATS_
3318 3332 _local_pushes = 0;
3319 3333 _local_pops = 0;
3320 3334 _local_max_size = 0;
3321 3335 _objs_scanned = 0;
3322 3336 _global_pushes = 0;
3323 3337 _global_pops = 0;
3324 3338 _global_max_size = 0;
3325 3339 _global_transfers_to = 0;
3326 3340 _global_transfers_from = 0;
3327 3341 _regions_claimed = 0;
3328 3342 _objs_found_on_bitmap = 0;
3329 3343 _satb_buffers_processed = 0;
3330 3344 _steal_attempts = 0;
3331 3345 _steals = 0;
3332 3346 _aborted = 0;
3333 3347 _aborted_overflow = 0;
3334 3348 _aborted_cm_aborted = 0;
3335 3349 _aborted_yield = 0;
3336 3350 _aborted_timed_out = 0;
3337 3351 _aborted_satb = 0;
3338 3352 _aborted_termination = 0;
3339 3353 #endif // _MARKING_STATS_
3340 3354 }
3341 3355
3342 3356 bool CMTask::should_exit_termination() {
3343 3357 regular_clock_call();
3344 3358 // This is called when we are in the termination protocol. We should
3345 3359 // quit if, for some reason, this task wants to abort or the global
3346 3360 // stack is not empty (this means that we can get work from it).
3347 3361 return !_cm->mark_stack_empty() || has_aborted();
3348 3362 }
3349 3363
3350 3364 void CMTask::reached_limit() {
3351 3365 assert(_words_scanned >= _words_scanned_limit ||
3352 3366 _refs_reached >= _refs_reached_limit ,
3353 3367 "shouldn't have been called otherwise");
3354 3368 regular_clock_call();
3355 3369 }
3356 3370
3357 3371 void CMTask::regular_clock_call() {
3358 3372 if (has_aborted()) return;
3359 3373
3360 3374 // First, we need to recalculate the words scanned and refs reached
3361 3375 // limits for the next clock call.
3362 3376 recalculate_limits();
3363 3377
3364 3378 // During the regular clock call we do the following
3365 3379
3366 3380 // (1) If an overflow has been flagged, then we abort.
3367 3381 if (_cm->has_overflown()) {
3368 3382 set_has_aborted();
3369 3383 return;
3370 3384 }
3371 3385
3372 3386 // If we are not concurrent (i.e. we're doing remark) we don't need
3373 3387 // to check anything else. The other steps are only needed during
3374 3388 // the concurrent marking phase.
3375 3389 if (!concurrent()) return;
3376 3390
3377 3391 // (2) If marking has been aborted for Full GC, then we also abort.
3378 3392 if (_cm->has_aborted()) {
3379 3393 set_has_aborted();
3380 3394 statsOnly( ++_aborted_cm_aborted );
3381 3395 return;
3382 3396 }
3383 3397
3384 3398 double curr_time_ms = os::elapsedVTime() * 1000.0;
3385 3399
3386 3400 // (3) If marking stats are enabled, then we update the step history.
3387 3401 #if _MARKING_STATS_
3388 3402 if (_words_scanned >= _words_scanned_limit) {
3389 3403 ++_clock_due_to_scanning;
3390 3404 }
3391 3405 if (_refs_reached >= _refs_reached_limit) {
3392 3406 ++_clock_due_to_marking;
3393 3407 }
3394 3408
3395 3409 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3396 3410 _interval_start_time_ms = curr_time_ms;
3397 3411 _all_clock_intervals_ms.add(last_interval_ms);
3398 3412
3399 3413 if (_cm->verbose_medium()) {
3400 3414 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3401 3415 "scanned = %d%s, refs reached = %d%s",
3402 3416 _task_id, last_interval_ms,
3403 3417 _words_scanned,
3404 3418 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3405 3419 _refs_reached,
3406 3420 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3407 3421 }
3408 3422 #endif // _MARKING_STATS_
3409 3423
3410 3424 // (4) We check whether we should yield. If we have to, then we abort.
3411 3425 if (_cm->should_yield()) {
3412 3426 // We should yield. To do this we abort the task. The caller is
3413 3427 // responsible for yielding.
3414 3428 set_has_aborted();
3415 3429 statsOnly( ++_aborted_yield );
3416 3430 return;
3417 3431 }
3418 3432
3419 3433 // (5) We check whether we've reached our time quota. If we have,
3420 3434 // then we abort.
3421 3435 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3422 3436 if (elapsed_time_ms > _time_target_ms) {
3423 3437 set_has_aborted();
3424 3438 _has_timed_out = true;
3425 3439 statsOnly( ++_aborted_timed_out );
3426 3440 return;
3427 3441 }
3428 3442
3429 3443 // (6) Finally, we check whether there are enough completed STAB
3430 3444 // buffers available for processing. If there are, we abort.
3431 3445 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3432 3446 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3433 3447 if (_cm->verbose_low()) {
3434 3448 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3435 3449 _task_id);
3436 3450 }
3437 3451 // we do need to process SATB buffers, we'll abort and restart
3438 3452 // the marking task to do so
3439 3453 set_has_aborted();
3440 3454 statsOnly( ++_aborted_satb );
3441 3455 return;
3442 3456 }
3443 3457 }
3444 3458
3445 3459 void CMTask::recalculate_limits() {
3446 3460 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3447 3461 _words_scanned_limit = _real_words_scanned_limit;
3448 3462
3449 3463 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3450 3464 _refs_reached_limit = _real_refs_reached_limit;
3451 3465 }
3452 3466
3453 3467 void CMTask::decrease_limits() {
3454 3468 // This is called when we believe that we're going to do an infrequent
3455 3469 // operation which will increase the per byte scanned cost (i.e. move
3456 3470 // entries to/from the global stack). It basically tries to decrease the
3457 3471 // scanning limit so that the clock is called earlier.
3458 3472
3459 3473 if (_cm->verbose_medium()) {
3460 3474 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3461 3475 }
3462 3476
3463 3477 _words_scanned_limit = _real_words_scanned_limit -
3464 3478 3 * words_scanned_period / 4;
3465 3479 _refs_reached_limit = _real_refs_reached_limit -
3466 3480 3 * refs_reached_period / 4;
3467 3481 }
3468 3482
3469 3483 void CMTask::move_entries_to_global_stack() {
3470 3484 // local array where we'll store the entries that will be popped
3471 3485 // from the local queue
3472 3486 oop buffer[global_stack_transfer_size];
3473 3487
3474 3488 int n = 0;
3475 3489 oop obj;
3476 3490 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3477 3491 buffer[n] = obj;
3478 3492 ++n;
3479 3493 }
3480 3494
3481 3495 if (n > 0) {
3482 3496 // we popped at least one entry from the local queue
3483 3497
3484 3498 statsOnly( ++_global_transfers_to; _local_pops += n );
3485 3499
3486 3500 if (!_cm->mark_stack_push(buffer, n)) {
3487 3501 if (_cm->verbose_low()) {
3488 3502 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3489 3503 _task_id);
3490 3504 }
3491 3505 set_has_aborted();
3492 3506 } else {
3493 3507 // the transfer was successful
3494 3508
3495 3509 if (_cm->verbose_medium()) {
3496 3510 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3497 3511 _task_id, n);
3498 3512 }
3499 3513 statsOnly( int tmp_size = _cm->mark_stack_size();
3500 3514 if (tmp_size > _global_max_size) {
3501 3515 _global_max_size = tmp_size;
3502 3516 }
3503 3517 _global_pushes += n );
3504 3518 }
3505 3519 }
3506 3520
3507 3521 // this operation was quite expensive, so decrease the limits
3508 3522 decrease_limits();
3509 3523 }
3510 3524
3511 3525 void CMTask::get_entries_from_global_stack() {
3512 3526 // local array where we'll store the entries that will be popped
3513 3527 // from the global stack.
3514 3528 oop buffer[global_stack_transfer_size];
3515 3529 int n;
3516 3530 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3517 3531 assert(n <= global_stack_transfer_size,
3518 3532 "we should not pop more than the given limit");
3519 3533 if (n > 0) {
3520 3534 // yes, we did actually pop at least one entry
3521 3535
3522 3536 statsOnly( ++_global_transfers_from; _global_pops += n );
3523 3537 if (_cm->verbose_medium()) {
3524 3538 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3525 3539 _task_id, n);
3526 3540 }
3527 3541 for (int i = 0; i < n; ++i) {
3528 3542 bool success = _task_queue->push(buffer[i]);
3529 3543 // We only call this when the local queue is empty or under a
3530 3544 // given target limit. So, we do not expect this push to fail.
3531 3545 assert(success, "invariant");
3532 3546 }
3533 3547
3534 3548 statsOnly( int tmp_size = _task_queue->size();
3535 3549 if (tmp_size > _local_max_size) {
3536 3550 _local_max_size = tmp_size;
3537 3551 }
3538 3552 _local_pushes += n );
3539 3553 }
3540 3554
3541 3555 // this operation was quite expensive, so decrease the limits
3542 3556 decrease_limits();
3543 3557 }
3544 3558
3545 3559 void CMTask::drain_local_queue(bool partially) {
3546 3560 if (has_aborted()) return;
3547 3561
3548 3562 // Decide what the target size is, depending whether we're going to
3549 3563 // drain it partially (so that other tasks can steal if they run out
3550 3564 // of things to do) or totally (at the very end).
3551 3565 size_t target_size;
3552 3566 if (partially) {
3553 3567 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3554 3568 } else {
3555 3569 target_size = 0;
3556 3570 }
3557 3571
3558 3572 if (_task_queue->size() > target_size) {
3559 3573 if (_cm->verbose_high()) {
3560 3574 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3561 3575 _task_id, target_size);
3562 3576 }
3563 3577
3564 3578 oop obj;
3565 3579 bool ret = _task_queue->pop_local(obj);
3566 3580 while (ret) {
3567 3581 statsOnly( ++_local_pops );
3568 3582
3569 3583 if (_cm->verbose_high()) {
3570 3584 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3571 3585 (void*) obj);
3572 3586 }
3573 3587
3574 3588 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3575 3589 assert(!_g1h->is_on_master_free_list(
3576 3590 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3577 3591
3578 3592 scan_object(obj);
3579 3593
3580 3594 if (_task_queue->size() <= target_size || has_aborted()) {
3581 3595 ret = false;
3582 3596 } else {
3583 3597 ret = _task_queue->pop_local(obj);
3584 3598 }
3585 3599 }
3586 3600
3587 3601 if (_cm->verbose_high()) {
3588 3602 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3589 3603 _task_id, _task_queue->size());
3590 3604 }
3591 3605 }
3592 3606 }
3593 3607
3594 3608 void CMTask::drain_global_stack(bool partially) {
3595 3609 if (has_aborted()) return;
3596 3610
3597 3611 // We have a policy to drain the local queue before we attempt to
3598 3612 // drain the global stack.
3599 3613 assert(partially || _task_queue->size() == 0, "invariant");
3600 3614
3601 3615 // Decide what the target size is, depending whether we're going to
3602 3616 // drain it partially (so that other tasks can steal if they run out
3603 3617 // of things to do) or totally (at the very end). Notice that,
3604 3618 // because we move entries from the global stack in chunks or
3605 3619 // because another task might be doing the same, we might in fact
3606 3620 // drop below the target. But, this is not a problem.
3607 3621 size_t target_size;
3608 3622 if (partially) {
3609 3623 target_size = _cm->partial_mark_stack_size_target();
3610 3624 } else {
3611 3625 target_size = 0;
3612 3626 }
3613 3627
3614 3628 if (_cm->mark_stack_size() > target_size) {
3615 3629 if (_cm->verbose_low()) {
3616 3630 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3617 3631 _task_id, target_size);
3618 3632 }
3619 3633
3620 3634 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3621 3635 get_entries_from_global_stack();
3622 3636 drain_local_queue(partially);
3623 3637 }
3624 3638
3625 3639 if (_cm->verbose_low()) {
3626 3640 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3627 3641 _task_id, _cm->mark_stack_size());
3628 3642 }
3629 3643 }
3630 3644 }
3631 3645
3632 3646 // SATB Queue has several assumptions on whether to call the par or
3633 3647 // non-par versions of the methods. this is why some of the code is
3634 3648 // replicated. We should really get rid of the single-threaded version
3635 3649 // of the code to simplify things.
3636 3650 void CMTask::drain_satb_buffers() {
3637 3651 if (has_aborted()) return;
3638 3652
3639 3653 // We set this so that the regular clock knows that we're in the
3640 3654 // middle of draining buffers and doesn't set the abort flag when it
3641 3655 // notices that SATB buffers are available for draining. It'd be
3642 3656 // very counter productive if it did that. :-)
3643 3657 _draining_satb_buffers = true;
3644 3658
3645 3659 CMObjectClosure oc(this);
3646 3660 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3647 3661 if (G1CollectedHeap::use_parallel_gc_threads()) {
3648 3662 satb_mq_set.set_par_closure(_task_id, &oc);
3649 3663 } else {
3650 3664 satb_mq_set.set_closure(&oc);
3651 3665 }
3652 3666
3653 3667 // This keeps claiming and applying the closure to completed buffers
3654 3668 // until we run out of buffers or we need to abort.
3655 3669 if (G1CollectedHeap::use_parallel_gc_threads()) {
3656 3670 while (!has_aborted() &&
3657 3671 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3658 3672 if (_cm->verbose_medium()) {
3659 3673 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3660 3674 }
3661 3675 statsOnly( ++_satb_buffers_processed );
3662 3676 regular_clock_call();
3663 3677 }
3664 3678 } else {
3665 3679 while (!has_aborted() &&
3666 3680 satb_mq_set.apply_closure_to_completed_buffer()) {
3667 3681 if (_cm->verbose_medium()) {
3668 3682 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3669 3683 }
3670 3684 statsOnly( ++_satb_buffers_processed );
3671 3685 regular_clock_call();
3672 3686 }
3673 3687 }
3674 3688
3675 3689 if (!concurrent() && !has_aborted()) {
3676 3690 // We should only do this during remark.
3677 3691 if (G1CollectedHeap::use_parallel_gc_threads()) {
3678 3692 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3679 3693 } else {
3680 3694 satb_mq_set.iterate_closure_all_threads();
3681 3695 }
3682 3696 }
3683 3697
3684 3698 _draining_satb_buffers = false;
3685 3699
3686 3700 assert(has_aborted() ||
3687 3701 concurrent() ||
3688 3702 satb_mq_set.completed_buffers_num() == 0, "invariant");
3689 3703
3690 3704 if (G1CollectedHeap::use_parallel_gc_threads()) {
3691 3705 satb_mq_set.set_par_closure(_task_id, NULL);
3692 3706 } else {
3693 3707 satb_mq_set.set_closure(NULL);
3694 3708 }
3695 3709
3696 3710 // again, this was a potentially expensive operation, decrease the
3697 3711 // limits to get the regular clock call early
3698 3712 decrease_limits();
3699 3713 }
3700 3714
3701 3715 void CMTask::print_stats() {
3702 3716 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3703 3717 _task_id, _calls);
3704 3718 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3705 3719 _elapsed_time_ms, _termination_time_ms);
3706 3720 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3707 3721 _step_times_ms.num(), _step_times_ms.avg(),
3708 3722 _step_times_ms.sd());
3709 3723 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3710 3724 _step_times_ms.maximum(), _step_times_ms.sum());
3711 3725
3712 3726 #if _MARKING_STATS_
3713 3727 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3714 3728 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3715 3729 _all_clock_intervals_ms.sd());
3716 3730 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3717 3731 _all_clock_intervals_ms.maximum(),
3718 3732 _all_clock_intervals_ms.sum());
3719 3733 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3720 3734 _clock_due_to_scanning, _clock_due_to_marking);
3721 3735 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3722 3736 _objs_scanned, _objs_found_on_bitmap);
3723 3737 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3724 3738 _local_pushes, _local_pops, _local_max_size);
3725 3739 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3726 3740 _global_pushes, _global_pops, _global_max_size);
3727 3741 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3728 3742 _global_transfers_to,_global_transfers_from);
3729 3743 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
3730 3744 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3731 3745 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3732 3746 _steal_attempts, _steals);
3733 3747 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3734 3748 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3735 3749 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3736 3750 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3737 3751 _aborted_timed_out, _aborted_satb, _aborted_termination);
3738 3752 #endif // _MARKING_STATS_
3739 3753 }
3740 3754
3741 3755 /*****************************************************************************
3742 3756
3743 3757 The do_marking_step(time_target_ms) method is the building block
3744 3758 of the parallel marking framework. It can be called in parallel
3745 3759 with other invocations of do_marking_step() on different tasks
3746 3760 (but only one per task, obviously) and concurrently with the
3747 3761 mutator threads, or during remark, hence it eliminates the need
3748 3762 for two versions of the code. When called during remark, it will
3749 3763 pick up from where the task left off during the concurrent marking
3750 3764 phase. Interestingly, tasks are also claimable during evacuation
3751 3765 pauses too, since do_marking_step() ensures that it aborts before
3752 3766 it needs to yield.
3753 3767
3754 3768 The data structures that is uses to do marking work are the
3755 3769 following:
3756 3770
3757 3771 (1) Marking Bitmap. If there are gray objects that appear only
3758 3772 on the bitmap (this happens either when dealing with an overflow
3759 3773 or when the initial marking phase has simply marked the roots
3760 3774 and didn't push them on the stack), then tasks claim heap
3761 3775 regions whose bitmap they then scan to find gray objects. A
3762 3776 global finger indicates where the end of the last claimed region
3763 3777 is. A local finger indicates how far into the region a task has
3764 3778 scanned. The two fingers are used to determine how to gray an
3765 3779 object (i.e. whether simply marking it is OK, as it will be
3766 3780 visited by a task in the future, or whether it needs to be also
3767 3781 pushed on a stack).
3768 3782
3769 3783 (2) Local Queue. The local queue of the task which is accessed
3770 3784 reasonably efficiently by the task. Other tasks can steal from
3771 3785 it when they run out of work. Throughout the marking phase, a
3772 3786 task attempts to keep its local queue short but not totally
3773 3787 empty, so that entries are available for stealing by other
3774 3788 tasks. Only when there is no more work, a task will totally
3775 3789 drain its local queue.
3776 3790
3777 3791 (3) Global Mark Stack. This handles local queue overflow. During
3778 3792 marking only sets of entries are moved between it and the local
3779 3793 queues, as access to it requires a mutex and more fine-grain
3780 3794 interaction with it which might cause contention. If it
3781 3795 overflows, then the marking phase should restart and iterate
3782 3796 over the bitmap to identify gray objects. Throughout the marking
3783 3797 phase, tasks attempt to keep the global mark stack at a small
3784 3798 length but not totally empty, so that entries are available for
3785 3799 popping by other tasks. Only when there is no more work, tasks
3786 3800 will totally drain the global mark stack.
3787 3801
3788 3802 (4) SATB Buffer Queue. This is where completed SATB buffers are
3789 3803 made available. Buffers are regularly removed from this queue
3790 3804 and scanned for roots, so that the queue doesn't get too
3791 3805 long. During remark, all completed buffers are processed, as
3792 3806 well as the filled in parts of any uncompleted buffers.
3793 3807
3794 3808 The do_marking_step() method tries to abort when the time target
3795 3809 has been reached. There are a few other cases when the
3796 3810 do_marking_step() method also aborts:
3797 3811
3798 3812 (1) When the marking phase has been aborted (after a Full GC).
3799 3813
3800 3814 (2) When a global overflow (on the global stack) has been
3801 3815 triggered. Before the task aborts, it will actually sync up with
3802 3816 the other tasks to ensure that all the marking data structures
3803 3817 (local queues, stacks, fingers etc.) are re-initialised so that
3804 3818 when do_marking_step() completes, the marking phase can
3805 3819 immediately restart.
3806 3820
3807 3821 (3) When enough completed SATB buffers are available. The
3808 3822 do_marking_step() method only tries to drain SATB buffers right
3809 3823 at the beginning. So, if enough buffers are available, the
3810 3824 marking step aborts and the SATB buffers are processed at
3811 3825 the beginning of the next invocation.
3812 3826
3813 3827 (4) To yield. when we have to yield then we abort and yield
3814 3828 right at the end of do_marking_step(). This saves us from a lot
3815 3829 of hassle as, by yielding we might allow a Full GC. If this
3816 3830 happens then objects will be compacted underneath our feet, the
3817 3831 heap might shrink, etc. We save checking for this by just
3818 3832 aborting and doing the yield right at the end.
3819 3833
3820 3834 From the above it follows that the do_marking_step() method should
3821 3835 be called in a loop (or, otherwise, regularly) until it completes.
3822 3836
3823 3837 If a marking step completes without its has_aborted() flag being
3824 3838 true, it means it has completed the current marking phase (and
3825 3839 also all other marking tasks have done so and have all synced up).
3826 3840
3827 3841 A method called regular_clock_call() is invoked "regularly" (in
3828 3842 sub ms intervals) throughout marking. It is this clock method that
3829 3843 checks all the abort conditions which were mentioned above and
3830 3844 decides when the task should abort. A work-based scheme is used to
3831 3845 trigger this clock method: when the number of object words the
3832 3846 marking phase has scanned or the number of references the marking
3833 3847 phase has visited reach a given limit. Additional invocations to
3834 3848 the method clock have been planted in a few other strategic places
3835 3849 too. The initial reason for the clock method was to avoid calling
3836 3850 vtime too regularly, as it is quite expensive. So, once it was in
3837 3851 place, it was natural to piggy-back all the other conditions on it
3838 3852 too and not constantly check them throughout the code.
3839 3853
3840 3854 *****************************************************************************/
3841 3855
3842 3856 void CMTask::do_marking_step(double time_target_ms,
3843 3857 bool do_stealing,
3844 3858 bool do_termination) {
3845 3859 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3846 3860 assert(concurrent() == _cm->concurrent(), "they should be the same");
3847 3861
3848 3862 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3849 3863 assert(_task_queues != NULL, "invariant");
3850 3864 assert(_task_queue != NULL, "invariant");
3851 3865 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3852 3866
3853 3867 assert(!_claimed,
3854 3868 "only one thread should claim this task at any one time");
3855 3869
3856 3870 // OK, this doesn't safeguard again all possible scenarios, as it is
3857 3871 // possible for two threads to set the _claimed flag at the same
3858 3872 // time. But it is only for debugging purposes anyway and it will
3859 3873 // catch most problems.
3860 3874 _claimed = true;
3861 3875
3862 3876 _start_time_ms = os::elapsedVTime() * 1000.0;
3863 3877 statsOnly( _interval_start_time_ms = _start_time_ms );
3864 3878
3865 3879 double diff_prediction_ms =
3866 3880 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3867 3881 _time_target_ms = time_target_ms - diff_prediction_ms;
3868 3882
3869 3883 // set up the variables that are used in the work-based scheme to
3870 3884 // call the regular clock method
3871 3885 _words_scanned = 0;
3872 3886 _refs_reached = 0;
3873 3887 recalculate_limits();
3874 3888
3875 3889 // clear all flags
3876 3890 clear_has_aborted();
3877 3891 _has_timed_out = false;
3878 3892 _draining_satb_buffers = false;
3879 3893
3880 3894 ++_calls;
3881 3895
3882 3896 if (_cm->verbose_low()) {
3883 3897 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3884 3898 "target = %1.2lfms >>>>>>>>>>",
3885 3899 _task_id, _calls, _time_target_ms);
3886 3900 }
3887 3901
3888 3902 // Set up the bitmap and oop closures. Anything that uses them is
3889 3903 // eventually called from this method, so it is OK to allocate these
3890 3904 // statically.
3891 3905 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3892 3906 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
3893 3907 set_cm_oop_closure(&cm_oop_closure);
3894 3908
3895 3909 if (_cm->has_overflown()) {
3896 3910 // This can happen if the mark stack overflows during a GC pause
3897 3911 // and this task, after a yield point, restarts. We have to abort
3898 3912 // as we need to get into the overflow protocol which happens
3899 3913 // right at the end of this task.
3900 3914 set_has_aborted();
3901 3915 }
3902 3916
3903 3917 // First drain any available SATB buffers. After this, we will not
3904 3918 // look at SATB buffers before the next invocation of this method.
3905 3919 // If enough completed SATB buffers are queued up, the regular clock
3906 3920 // will abort this task so that it restarts.
3907 3921 drain_satb_buffers();
3908 3922 // ...then partially drain the local queue and the global stack
3909 3923 drain_local_queue(true);
3910 3924 drain_global_stack(true);
3911 3925
3912 3926 do {
3913 3927 if (!has_aborted() && _curr_region != NULL) {
3914 3928 // This means that we're already holding on to a region.
3915 3929 assert(_finger != NULL, "if region is not NULL, then the finger "
3916 3930 "should not be NULL either");
3917 3931
3918 3932 // We might have restarted this task after an evacuation pause
3919 3933 // which might have evacuated the region we're holding on to
3920 3934 // underneath our feet. Let's read its limit again to make sure
3921 3935 // that we do not iterate over a region of the heap that
3922 3936 // contains garbage (update_region_limit() will also move
3923 3937 // _finger to the start of the region if it is found empty).
3924 3938 update_region_limit();
3925 3939 // We will start from _finger not from the start of the region,
3926 3940 // as we might be restarting this task after aborting half-way
3927 3941 // through scanning this region. In this case, _finger points to
3928 3942 // the address where we last found a marked object. If this is a
3929 3943 // fresh region, _finger points to start().
3930 3944 MemRegion mr = MemRegion(_finger, _region_limit);
3931 3945
3932 3946 if (_cm->verbose_low()) {
3933 3947 gclog_or_tty->print_cr("[%d] we're scanning part "
3934 3948 "["PTR_FORMAT", "PTR_FORMAT") "
3935 3949 "of region "PTR_FORMAT,
3936 3950 _task_id, _finger, _region_limit, _curr_region);
3937 3951 }
3938 3952
3939 3953 // Let's iterate over the bitmap of the part of the
3940 3954 // region that is left.
3941 3955 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3942 3956 // We successfully completed iterating over the region. Now,
3943 3957 // let's give up the region.
3944 3958 giveup_current_region();
3945 3959 regular_clock_call();
3946 3960 } else {
3947 3961 assert(has_aborted(), "currently the only way to do so");
3948 3962 // The only way to abort the bitmap iteration is to return
3949 3963 // false from the do_bit() method. However, inside the
3950 3964 // do_bit() method we move the _finger to point to the
3951 3965 // object currently being looked at. So, if we bail out, we
3952 3966 // have definitely set _finger to something non-null.
3953 3967 assert(_finger != NULL, "invariant");
3954 3968
3955 3969 // Region iteration was actually aborted. So now _finger
3956 3970 // points to the address of the object we last scanned. If we
3957 3971 // leave it there, when we restart this task, we will rescan
3958 3972 // the object. It is easy to avoid this. We move the finger by
3959 3973 // enough to point to the next possible object header (the
3960 3974 // bitmap knows by how much we need to move it as it knows its
3961 3975 // granularity).
3962 3976 assert(_finger < _region_limit, "invariant");
3963 3977 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3964 3978 // Check if bitmap iteration was aborted while scanning the last object
3965 3979 if (new_finger >= _region_limit) {
3966 3980 giveup_current_region();
3967 3981 } else {
3968 3982 move_finger_to(new_finger);
3969 3983 }
3970 3984 }
3971 3985 }
3972 3986 // At this point we have either completed iterating over the
3973 3987 // region we were holding on to, or we have aborted.
3974 3988
3975 3989 // We then partially drain the local queue and the global stack.
3976 3990 // (Do we really need this?)
3977 3991 drain_local_queue(true);
3978 3992 drain_global_stack(true);
3979 3993
3980 3994 // Read the note on the claim_region() method on why it might
3981 3995 // return NULL with potentially more regions available for
3982 3996 // claiming and why we have to check out_of_regions() to determine
3983 3997 // whether we're done or not.
3984 3998 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3985 3999 // We are going to try to claim a new region. We should have
3986 4000 // given up on the previous one.
3987 4001 // Separated the asserts so that we know which one fires.
3988 4002 assert(_curr_region == NULL, "invariant");
3989 4003 assert(_finger == NULL, "invariant");
3990 4004 assert(_region_limit == NULL, "invariant");
3991 4005 if (_cm->verbose_low()) {
3992 4006 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3993 4007 }
3994 4008 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3995 4009 if (claimed_region != NULL) {
3996 4010 // Yes, we managed to claim one
3997 4011 statsOnly( ++_regions_claimed );
3998 4012
3999 4013 if (_cm->verbose_low()) {
4000 4014 gclog_or_tty->print_cr("[%d] we successfully claimed "
4001 4015 "region "PTR_FORMAT,
4002 4016 _task_id, claimed_region);
4003 4017 }
4004 4018
4005 4019 setup_for_region(claimed_region);
4006 4020 assert(_curr_region == claimed_region, "invariant");
4007 4021 }
4008 4022 // It is important to call the regular clock here. It might take
4009 4023 // a while to claim a region if, for example, we hit a large
4010 4024 // block of empty regions. So we need to call the regular clock
4011 4025 // method once round the loop to make sure it's called
4012 4026 // frequently enough.
4013 4027 regular_clock_call();
4014 4028 }
4015 4029
4016 4030 if (!has_aborted() && _curr_region == NULL) {
4017 4031 assert(_cm->out_of_regions(),
4018 4032 "at this point we should be out of regions");
4019 4033 }
4020 4034 } while ( _curr_region != NULL && !has_aborted());
4021 4035
4022 4036 if (!has_aborted()) {
4023 4037 // We cannot check whether the global stack is empty, since other
4024 4038 // tasks might be pushing objects to it concurrently.
4025 4039 assert(_cm->out_of_regions(),
4026 4040 "at this point we should be out of regions");
4027 4041
4028 4042 if (_cm->verbose_low()) {
4029 4043 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4030 4044 }
4031 4045
4032 4046 // Try to reduce the number of available SATB buffers so that
4033 4047 // remark has less work to do.
4034 4048 drain_satb_buffers();
4035 4049 }
4036 4050
4037 4051 // Since we've done everything else, we can now totally drain the
4038 4052 // local queue and global stack.
4039 4053 drain_local_queue(false);
4040 4054 drain_global_stack(false);
4041 4055
4042 4056 // Attempt at work stealing from other task's queues.
4043 4057 if (do_stealing && !has_aborted()) {
4044 4058 // We have not aborted. This means that we have finished all that
4045 4059 // we could. Let's try to do some stealing...
4046 4060
4047 4061 // We cannot check whether the global stack is empty, since other
4048 4062 // tasks might be pushing objects to it concurrently.
4049 4063 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4050 4064 "only way to reach here");
4051 4065
4052 4066 if (_cm->verbose_low()) {
4053 4067 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4054 4068 }
4055 4069
4056 4070 while (!has_aborted()) {
4057 4071 oop obj;
4058 4072 statsOnly( ++_steal_attempts );
4059 4073
4060 4074 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4061 4075 if (_cm->verbose_medium()) {
4062 4076 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4063 4077 _task_id, (void*) obj);
4064 4078 }
4065 4079
4066 4080 statsOnly( ++_steals );
4067 4081
4068 4082 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4069 4083 "any stolen object should be marked");
4070 4084 scan_object(obj);
4071 4085
4072 4086 // And since we're towards the end, let's totally drain the
4073 4087 // local queue and global stack.
4074 4088 drain_local_queue(false);
4075 4089 drain_global_stack(false);
4076 4090 } else {
4077 4091 break;
4078 4092 }
4079 4093 }
4080 4094 }
4081 4095
4082 4096 // If we are about to wrap up and go into termination, check if we
4083 4097 // should raise the overflow flag.
4084 4098 if (do_termination && !has_aborted()) {
4085 4099 if (_cm->force_overflow()->should_force()) {
4086 4100 _cm->set_has_overflown();
4087 4101 regular_clock_call();
4088 4102 }
4089 4103 }
4090 4104
4091 4105 // We still haven't aborted. Now, let's try to get into the
4092 4106 // termination protocol.
4093 4107 if (do_termination && !has_aborted()) {
4094 4108 // We cannot check whether the global stack is empty, since other
4095 4109 // tasks might be concurrently pushing objects on it.
4096 4110 // Separated the asserts so that we know which one fires.
4097 4111 assert(_cm->out_of_regions(), "only way to reach here");
4098 4112 assert(_task_queue->size() == 0, "only way to reach here");
4099 4113
4100 4114 if (_cm->verbose_low()) {
4101 4115 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4102 4116 }
4103 4117
4104 4118 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4105 4119 // The CMTask class also extends the TerminatorTerminator class,
4106 4120 // hence its should_exit_termination() method will also decide
4107 4121 // whether to exit the termination protocol or not.
4108 4122 bool finished = _cm->terminator()->offer_termination(this);
4109 4123 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4110 4124 _termination_time_ms +=
4111 4125 termination_end_time_ms - _termination_start_time_ms;
4112 4126
4113 4127 if (finished) {
4114 4128 // We're all done.
4115 4129
4116 4130 if (_task_id == 0) {
4117 4131 // let's allow task 0 to do this
4118 4132 if (concurrent()) {
4119 4133 assert(_cm->concurrent_marking_in_progress(), "invariant");
4120 4134 // we need to set this to false before the next
4121 4135 // safepoint. This way we ensure that the marking phase
4122 4136 // doesn't observe any more heap expansions.
4123 4137 _cm->clear_concurrent_marking_in_progress();
4124 4138 }
4125 4139 }
4126 4140
4127 4141 // We can now guarantee that the global stack is empty, since
4128 4142 // all other tasks have finished. We separated the guarantees so
4129 4143 // that, if a condition is false, we can immediately find out
4130 4144 // which one.
4131 4145 guarantee(_cm->out_of_regions(), "only way to reach here");
4132 4146 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4133 4147 guarantee(_task_queue->size() == 0, "only way to reach here");
4134 4148 guarantee(!_cm->has_overflown(), "only way to reach here");
4135 4149 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4136 4150
4137 4151 if (_cm->verbose_low()) {
4138 4152 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4139 4153 }
4140 4154 } else {
4141 4155 // Apparently there's more work to do. Let's abort this task. It
4142 4156 // will restart it and we can hopefully find more things to do.
4143 4157
4144 4158 if (_cm->verbose_low()) {
4145 4159 gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4146 4160 _task_id);
4147 4161 }
4148 4162
4149 4163 set_has_aborted();
4150 4164 statsOnly( ++_aborted_termination );
4151 4165 }
4152 4166 }
4153 4167
4154 4168 // Mainly for debugging purposes to make sure that a pointer to the
4155 4169 // closure which was statically allocated in this frame doesn't
4156 4170 // escape it by accident.
4157 4171 set_cm_oop_closure(NULL);
4158 4172 double end_time_ms = os::elapsedVTime() * 1000.0;
4159 4173 double elapsed_time_ms = end_time_ms - _start_time_ms;
4160 4174 // Update the step history.
4161 4175 _step_times_ms.add(elapsed_time_ms);
4162 4176
4163 4177 if (has_aborted()) {
4164 4178 // The task was aborted for some reason.
4165 4179
4166 4180 statsOnly( ++_aborted );
4167 4181
4168 4182 if (_has_timed_out) {
4169 4183 double diff_ms = elapsed_time_ms - _time_target_ms;
4170 4184 // Keep statistics of how well we did with respect to hitting
4171 4185 // our target only if we actually timed out (if we aborted for
4172 4186 // other reasons, then the results might get skewed).
4173 4187 _marking_step_diffs_ms.add(diff_ms);
4174 4188 }
4175 4189
4176 4190 if (_cm->has_overflown()) {
4177 4191 // This is the interesting one. We aborted because a global
4178 4192 // overflow was raised. This means we have to restart the
4179 4193 // marking phase and start iterating over regions. However, in
4180 4194 // order to do this we have to make sure that all tasks stop
4181 4195 // what they are doing and re-initialise in a safe manner. We
4182 4196 // will achieve this with the use of two barrier sync points.
4183 4197
4184 4198 if (_cm->verbose_low()) {
4185 4199 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4186 4200 }
4187 4201
4188 4202 _cm->enter_first_sync_barrier(_task_id);
4189 4203 // When we exit this sync barrier we know that all tasks have
4190 4204 // stopped doing marking work. So, it's now safe to
4191 4205 // re-initialise our data structures. At the end of this method,
4192 4206 // task 0 will clear the global data structures.
4193 4207
4194 4208 statsOnly( ++_aborted_overflow );
4195 4209
4196 4210 // We clear the local state of this task...
4197 4211 clear_region_fields();
4198 4212
4199 4213 // ...and enter the second barrier.
4200 4214 _cm->enter_second_sync_barrier(_task_id);
4201 4215 // At this point everything has bee re-initialised and we're
4202 4216 // ready to restart.
4203 4217 }
4204 4218
4205 4219 if (_cm->verbose_low()) {
4206 4220 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4207 4221 "elapsed = %1.2lfms <<<<<<<<<<",
4208 4222 _task_id, _time_target_ms, elapsed_time_ms);
4209 4223 if (_cm->has_aborted()) {
4210 4224 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4211 4225 _task_id);
4212 4226 }
4213 4227 }
4214 4228 } else {
4215 4229 if (_cm->verbose_low()) {
4216 4230 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4217 4231 "elapsed = %1.2lfms <<<<<<<<<<",
4218 4232 _task_id, _time_target_ms, elapsed_time_ms);
4219 4233 }
4220 4234 }
4221 4235
4222 4236 _claimed = false;
4223 4237 }
4224 4238
4225 4239 CMTask::CMTask(int task_id,
4226 4240 ConcurrentMark* cm,
4227 4241 size_t* marked_bytes,
4228 4242 BitMap* card_bm,
4229 4243 CMTaskQueue* task_queue,
4230 4244 CMTaskQueueSet* task_queues)
4231 4245 : _g1h(G1CollectedHeap::heap()),
4232 4246 _task_id(task_id), _cm(cm),
4233 4247 _claimed(false),
4234 4248 _nextMarkBitMap(NULL), _hash_seed(17),
4235 4249 _task_queue(task_queue),
4236 4250 _task_queues(task_queues),
4237 4251 _cm_oop_closure(NULL),
4238 4252 _marked_bytes_array(marked_bytes),
4239 4253 _card_bm(card_bm) {
4240 4254 guarantee(task_queue != NULL, "invariant");
4241 4255 guarantee(task_queues != NULL, "invariant");
4242 4256
4243 4257 statsOnly( _clock_due_to_scanning = 0;
4244 4258 _clock_due_to_marking = 0 );
4245 4259
4246 4260 _marking_step_diffs_ms.add(0.5);
4247 4261 }
4248 4262
4249 4263 // These are formatting macros that are used below to ensure
4250 4264 // consistent formatting. The *_H_* versions are used to format the
4251 4265 // header for a particular value and they should be kept consistent
4252 4266 // with the corresponding macro. Also note that most of the macros add
4253 4267 // the necessary white space (as a prefix) which makes them a bit
4254 4268 // easier to compose.
4255 4269
4256 4270 // All the output lines are prefixed with this string to be able to
4257 4271 // identify them easily in a large log file.
4258 4272 #define G1PPRL_LINE_PREFIX "###"
4259 4273
4260 4274 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4261 4275 #ifdef _LP64
4262 4276 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4263 4277 #else // _LP64
4264 4278 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4265 4279 #endif // _LP64
4266 4280
4267 4281 // For per-region info
4268 4282 #define G1PPRL_TYPE_FORMAT " %-4s"
4269 4283 #define G1PPRL_TYPE_H_FORMAT " %4s"
4270 4284 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4271 4285 #define G1PPRL_BYTE_H_FORMAT " %9s"
4272 4286 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4273 4287 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4274 4288
4275 4289 // For summary info
4276 4290 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4277 4291 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4278 4292 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4279 4293 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4280 4294
4281 4295 G1PrintRegionLivenessInfoClosure::
4282 4296 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4283 4297 : _out(out),
4284 4298 _total_used_bytes(0), _total_capacity_bytes(0),
4285 4299 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4286 4300 _hum_used_bytes(0), _hum_capacity_bytes(0),
4287 4301 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4288 4302 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4289 4303 MemRegion g1_committed = g1h->g1_committed();
4290 4304 MemRegion g1_reserved = g1h->g1_reserved();
4291 4305 double now = os::elapsedTime();
4292 4306
4293 4307 // Print the header of the output.
4294 4308 _out->cr();
4295 4309 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4296 4310 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4297 4311 G1PPRL_SUM_ADDR_FORMAT("committed")
4298 4312 G1PPRL_SUM_ADDR_FORMAT("reserved")
4299 4313 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4300 4314 g1_committed.start(), g1_committed.end(),
4301 4315 g1_reserved.start(), g1_reserved.end(),
4302 4316 HeapRegion::GrainBytes);
4303 4317 _out->print_cr(G1PPRL_LINE_PREFIX);
4304 4318 _out->print_cr(G1PPRL_LINE_PREFIX
4305 4319 G1PPRL_TYPE_H_FORMAT
4306 4320 G1PPRL_ADDR_BASE_H_FORMAT
4307 4321 G1PPRL_BYTE_H_FORMAT
4308 4322 G1PPRL_BYTE_H_FORMAT
4309 4323 G1PPRL_BYTE_H_FORMAT
4310 4324 G1PPRL_DOUBLE_H_FORMAT,
4311 4325 "type", "address-range",
4312 4326 "used", "prev-live", "next-live", "gc-eff");
4313 4327 _out->print_cr(G1PPRL_LINE_PREFIX
4314 4328 G1PPRL_TYPE_H_FORMAT
4315 4329 G1PPRL_ADDR_BASE_H_FORMAT
4316 4330 G1PPRL_BYTE_H_FORMAT
4317 4331 G1PPRL_BYTE_H_FORMAT
4318 4332 G1PPRL_BYTE_H_FORMAT
4319 4333 G1PPRL_DOUBLE_H_FORMAT,
4320 4334 "", "",
4321 4335 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4322 4336 }
4323 4337
4324 4338 // It takes as a parameter a reference to one of the _hum_* fields, it
4325 4339 // deduces the corresponding value for a region in a humongous region
4326 4340 // series (either the region size, or what's left if the _hum_* field
4327 4341 // is < the region size), and updates the _hum_* field accordingly.
4328 4342 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4329 4343 size_t bytes = 0;
4330 4344 // The > 0 check is to deal with the prev and next live bytes which
4331 4345 // could be 0.
4332 4346 if (*hum_bytes > 0) {
4333 4347 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4334 4348 *hum_bytes -= bytes;
4335 4349 }
4336 4350 return bytes;
4337 4351 }
4338 4352
4339 4353 // It deduces the values for a region in a humongous region series
4340 4354 // from the _hum_* fields and updates those accordingly. It assumes
4341 4355 // that that _hum_* fields have already been set up from the "starts
4342 4356 // humongous" region and we visit the regions in address order.
4343 4357 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4344 4358 size_t* capacity_bytes,
4345 4359 size_t* prev_live_bytes,
4346 4360 size_t* next_live_bytes) {
4347 4361 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4348 4362 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4349 4363 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4350 4364 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4351 4365 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4352 4366 }
4353 4367
4354 4368 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4355 4369 const char* type = "";
4356 4370 HeapWord* bottom = r->bottom();
4357 4371 HeapWord* end = r->end();
4358 4372 size_t capacity_bytes = r->capacity();
4359 4373 size_t used_bytes = r->used();
4360 4374 size_t prev_live_bytes = r->live_bytes();
4361 4375 size_t next_live_bytes = r->next_live_bytes();
4362 4376 double gc_eff = r->gc_efficiency();
4363 4377 if (r->used() == 0) {
4364 4378 type = "FREE";
4365 4379 } else if (r->is_survivor()) {
4366 4380 type = "SURV";
4367 4381 } else if (r->is_young()) {
4368 4382 type = "EDEN";
4369 4383 } else if (r->startsHumongous()) {
4370 4384 type = "HUMS";
4371 4385
4372 4386 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4373 4387 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4374 4388 "they should have been zeroed after the last time we used them");
4375 4389 // Set up the _hum_* fields.
4376 4390 _hum_capacity_bytes = capacity_bytes;
4377 4391 _hum_used_bytes = used_bytes;
4378 4392 _hum_prev_live_bytes = prev_live_bytes;
4379 4393 _hum_next_live_bytes = next_live_bytes;
4380 4394 get_hum_bytes(&used_bytes, &capacity_bytes,
4381 4395 &prev_live_bytes, &next_live_bytes);
4382 4396 end = bottom + HeapRegion::GrainWords;
4383 4397 } else if (r->continuesHumongous()) {
4384 4398 type = "HUMC";
4385 4399 get_hum_bytes(&used_bytes, &capacity_bytes,
4386 4400 &prev_live_bytes, &next_live_bytes);
4387 4401 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4388 4402 } else {
4389 4403 type = "OLD";
4390 4404 }
4391 4405
4392 4406 _total_used_bytes += used_bytes;
4393 4407 _total_capacity_bytes += capacity_bytes;
4394 4408 _total_prev_live_bytes += prev_live_bytes;
4395 4409 _total_next_live_bytes += next_live_bytes;
4396 4410
4397 4411 // Print a line for this particular region.
4398 4412 _out->print_cr(G1PPRL_LINE_PREFIX
4399 4413 G1PPRL_TYPE_FORMAT
4400 4414 G1PPRL_ADDR_BASE_FORMAT
4401 4415 G1PPRL_BYTE_FORMAT
4402 4416 G1PPRL_BYTE_FORMAT
4403 4417 G1PPRL_BYTE_FORMAT
4404 4418 G1PPRL_DOUBLE_FORMAT,
4405 4419 type, bottom, end,
4406 4420 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4407 4421
4408 4422 return false;
4409 4423 }
4410 4424
4411 4425 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4412 4426 // Print the footer of the output.
4413 4427 _out->print_cr(G1PPRL_LINE_PREFIX);
4414 4428 _out->print_cr(G1PPRL_LINE_PREFIX
4415 4429 " SUMMARY"
4416 4430 G1PPRL_SUM_MB_FORMAT("capacity")
4417 4431 G1PPRL_SUM_MB_PERC_FORMAT("used")
4418 4432 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4419 4433 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4420 4434 bytes_to_mb(_total_capacity_bytes),
4421 4435 bytes_to_mb(_total_used_bytes),
4422 4436 perc(_total_used_bytes, _total_capacity_bytes),
4423 4437 bytes_to_mb(_total_prev_live_bytes),
4424 4438 perc(_total_prev_live_bytes, _total_capacity_bytes),
4425 4439 bytes_to_mb(_total_next_live_bytes),
4426 4440 perc(_total_next_live_bytes, _total_capacity_bytes));
4427 4441 _out->cr();
4428 4442 }
↓ open down ↓ |
1451 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX