Print this page
rev 4008 : 8001985: G1: Backport fix for 7200261 to hsx24
Summary: The automatic backport of the fix for 7200261 did not apply cleanly to hsx24 - there were two rejected hunks that had to be fixed up by hand.
Reviewed-by:
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 32 #include "gc_implementation/g1/g1Log.hpp"
33 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
34 34 #include "gc_implementation/g1/g1RemSet.hpp"
35 35 #include "gc_implementation/g1/heapRegion.inline.hpp"
36 36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
37 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
38 38 #include "gc_implementation/shared/vmGCOperations.hpp"
39 39 #include "memory/genOopClosures.inline.hpp"
40 40 #include "memory/referencePolicy.hpp"
41 41 #include "memory/resourceArea.hpp"
42 42 #include "oops/oop.inline.hpp"
43 43 #include "runtime/handles.inline.hpp"
44 44 #include "runtime/java.hpp"
45 45 #include "services/memTracker.hpp"
46 46
47 47 // Concurrent marking bit map wrapper
48 48
49 49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
50 50 _bm((uintptr_t*)NULL,0),
51 51 _shifter(shifter) {
52 52 _bmStartWord = (HeapWord*)(rs.base());
53 53 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
54 54 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
55 55 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
56 56
57 57 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
58 58
59 59 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
60 60 // For now we'll just commit all of the bit map up fromt.
61 61 // Later on we'll try to be more parsimonious with swap.
62 62 guarantee(_virtual_space.initialize(brs, brs.size()),
63 63 "couldn't reseve backing store for concurrent marking bit map");
64 64 assert(_virtual_space.committed_size() == brs.size(),
65 65 "didn't reserve backing store for all of concurrent marking bit map?");
66 66 _bm.set_map((uintptr_t*)_virtual_space.low());
67 67 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
68 68 _bmWordSize, "inconsistency in bit map sizing");
69 69 _bm.set_size(_bmWordSize >> _shifter);
70 70 }
71 71
72 72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
73 73 HeapWord* limit) const {
74 74 // First we must round addr *up* to a possible object boundary.
75 75 addr = (HeapWord*)align_size_up((intptr_t)addr,
76 76 HeapWordSize << _shifter);
77 77 size_t addrOffset = heapWordToOffset(addr);
78 78 if (limit == NULL) {
79 79 limit = _bmStartWord + _bmWordSize;
80 80 }
81 81 size_t limitOffset = heapWordToOffset(limit);
82 82 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
83 83 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
84 84 assert(nextAddr >= addr, "get_next_one postcondition");
85 85 assert(nextAddr == limit || isMarked(nextAddr),
86 86 "get_next_one postcondition");
87 87 return nextAddr;
88 88 }
89 89
90 90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
91 91 HeapWord* limit) const {
92 92 size_t addrOffset = heapWordToOffset(addr);
93 93 if (limit == NULL) {
94 94 limit = _bmStartWord + _bmWordSize;
95 95 }
96 96 size_t limitOffset = heapWordToOffset(limit);
97 97 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
98 98 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
99 99 assert(nextAddr >= addr, "get_next_one postcondition");
100 100 assert(nextAddr == limit || !isMarked(nextAddr),
101 101 "get_next_one postcondition");
102 102 return nextAddr;
103 103 }
104 104
105 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
106 106 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
107 107 return (int) (diff >> _shifter);
108 108 }
109 109
110 110 #ifndef PRODUCT
111 111 bool CMBitMapRO::covers(ReservedSpace rs) const {
112 112 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
113 113 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
114 114 "size inconsistency");
115 115 return _bmStartWord == (HeapWord*)(rs.base()) &&
116 116 _bmWordSize == rs.size()>>LogHeapWordSize;
117 117 }
118 118 #endif
119 119
120 120 void CMBitMap::clearAll() {
121 121 _bm.clear();
122 122 return;
123 123 }
124 124
125 125 void CMBitMap::markRange(MemRegion mr) {
126 126 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
127 127 assert(!mr.is_empty(), "unexpected empty region");
128 128 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
129 129 ((HeapWord *) mr.end())),
130 130 "markRange memory region end is not card aligned");
131 131 // convert address range into offset range
132 132 _bm.at_put_range(heapWordToOffset(mr.start()),
133 133 heapWordToOffset(mr.end()), true);
134 134 }
135 135
136 136 void CMBitMap::clearRange(MemRegion mr) {
137 137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
138 138 assert(!mr.is_empty(), "unexpected empty region");
139 139 // convert address range into offset range
140 140 _bm.at_put_range(heapWordToOffset(mr.start()),
141 141 heapWordToOffset(mr.end()), false);
142 142 }
143 143
144 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
145 145 HeapWord* end_addr) {
146 146 HeapWord* start = getNextMarkedWordAddress(addr);
147 147 start = MIN2(start, end_addr);
148 148 HeapWord* end = getNextUnmarkedWordAddress(start);
149 149 end = MIN2(end, end_addr);
150 150 assert(start <= end, "Consistency check");
151 151 MemRegion mr(start, end);
152 152 if (!mr.is_empty()) {
153 153 clearRange(mr);
154 154 }
155 155 return mr;
156 156 }
157 157
158 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
159 159 _base(NULL), _cm(cm)
160 160 #ifdef ASSERT
161 161 , _drain_in_progress(false)
162 162 , _drain_in_progress_yields(false)
163 163 #endif
164 164 {}
165 165
166 166 void CMMarkStack::allocate(size_t size) {
167 167 _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
168 168 if (_base == NULL) {
169 169 vm_exit_during_initialization("Failed to allocate CM region mark stack");
170 170 }
171 171 _index = 0;
172 172 _capacity = (jint) size;
173 173 _saved_index = -1;
174 174 NOT_PRODUCT(_max_depth = 0);
175 175 }
176 176
177 177 CMMarkStack::~CMMarkStack() {
178 178 if (_base != NULL) {
179 179 FREE_C_HEAP_ARRAY(oop, _base, mtGC);
180 180 }
181 181 }
182 182
183 183 void CMMarkStack::par_push(oop ptr) {
184 184 while (true) {
185 185 if (isFull()) {
186 186 _overflow = true;
187 187 return;
188 188 }
189 189 // Otherwise...
190 190 jint index = _index;
191 191 jint next_index = index+1;
192 192 jint res = Atomic::cmpxchg(next_index, &_index, index);
193 193 if (res == index) {
194 194 _base[index] = ptr;
195 195 // Note that we don't maintain this atomically. We could, but it
196 196 // doesn't seem necessary.
197 197 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
198 198 return;
199 199 }
200 200 // Otherwise, we need to try again.
201 201 }
202 202 }
203 203
204 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
205 205 while (true) {
206 206 if (isFull()) {
207 207 _overflow = true;
208 208 return;
209 209 }
210 210 // Otherwise...
211 211 jint index = _index;
212 212 jint next_index = index + n;
213 213 if (next_index > _capacity) {
214 214 _overflow = true;
215 215 return;
216 216 }
217 217 jint res = Atomic::cmpxchg(next_index, &_index, index);
218 218 if (res == index) {
219 219 for (int i = 0; i < n; i++) {
220 220 int ind = index + i;
221 221 assert(ind < _capacity, "By overflow test above.");
222 222 _base[ind] = ptr_arr[i];
223 223 }
224 224 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
225 225 return;
226 226 }
227 227 // Otherwise, we need to try again.
228 228 }
229 229 }
230 230
231 231
232 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
233 233 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
234 234 jint start = _index;
235 235 jint next_index = start + n;
236 236 if (next_index > _capacity) {
237 237 _overflow = true;
238 238 return;
239 239 }
240 240 // Otherwise.
241 241 _index = next_index;
242 242 for (int i = 0; i < n; i++) {
243 243 int ind = start + i;
244 244 assert(ind < _capacity, "By overflow test above.");
245 245 _base[ind] = ptr_arr[i];
246 246 }
247 247 }
248 248
249 249
250 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
251 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
252 252 jint index = _index;
253 253 if (index == 0) {
254 254 *n = 0;
255 255 return false;
256 256 } else {
257 257 int k = MIN2(max, index);
258 258 jint new_ind = index - k;
259 259 for (int j = 0; j < k; j++) {
260 260 ptr_arr[j] = _base[new_ind + j];
261 261 }
262 262 _index = new_ind;
263 263 *n = k;
264 264 return true;
265 265 }
266 266 }
267 267
268 268 template<class OopClosureClass>
269 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
270 270 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
271 271 || SafepointSynchronize::is_at_safepoint(),
272 272 "Drain recursion must be yield-safe.");
273 273 bool res = true;
274 274 debug_only(_drain_in_progress = true);
275 275 debug_only(_drain_in_progress_yields = yield_after);
276 276 while (!isEmpty()) {
277 277 oop newOop = pop();
278 278 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
279 279 assert(newOop->is_oop(), "Expected an oop");
280 280 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
281 281 "only grey objects on this stack");
282 282 newOop->oop_iterate(cl);
283 283 if (yield_after && _cm->do_yield_check()) {
284 284 res = false;
285 285 break;
286 286 }
287 287 }
288 288 debug_only(_drain_in_progress = false);
289 289 return res;
290 290 }
291 291
292 292 void CMMarkStack::note_start_of_gc() {
293 293 assert(_saved_index == -1,
294 294 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
295 295 _saved_index = _index;
296 296 }
297 297
298 298 void CMMarkStack::note_end_of_gc() {
299 299 // This is intentionally a guarantee, instead of an assert. If we
300 300 // accidentally add something to the mark stack during GC, it
301 301 // will be a correctness issue so it's better if we crash. we'll
302 302 // only check this once per GC anyway, so it won't be a performance
303 303 // issue in any way.
304 304 guarantee(_saved_index == _index,
305 305 err_msg("saved index: %d index: %d", _saved_index, _index));
306 306 _saved_index = -1;
307 307 }
308 308
309 309 void CMMarkStack::oops_do(OopClosure* f) {
310 310 assert(_saved_index == _index,
311 311 err_msg("saved index: %d index: %d", _saved_index, _index));
312 312 for (int i = 0; i < _index; i += 1) {
313 313 f->do_oop(&_base[i]);
314 314 }
315 315 }
316 316
317 317 bool ConcurrentMark::not_yet_marked(oop obj) const {
318 318 return (_g1h->is_obj_ill(obj)
319 319 || (_g1h->is_in_permanent(obj)
320 320 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
321 321 }
322 322
323 323 CMRootRegions::CMRootRegions() :
324 324 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
325 325 _should_abort(false), _next_survivor(NULL) { }
326 326
327 327 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
328 328 _young_list = g1h->young_list();
329 329 _cm = cm;
330 330 }
331 331
332 332 void CMRootRegions::prepare_for_scan() {
333 333 assert(!scan_in_progress(), "pre-condition");
334 334
335 335 // Currently, only survivors can be root regions.
336 336 assert(_next_survivor == NULL, "pre-condition");
337 337 _next_survivor = _young_list->first_survivor_region();
338 338 _scan_in_progress = (_next_survivor != NULL);
339 339 _should_abort = false;
340 340 }
341 341
342 342 HeapRegion* CMRootRegions::claim_next() {
343 343 if (_should_abort) {
344 344 // If someone has set the should_abort flag, we return NULL to
345 345 // force the caller to bail out of their loop.
346 346 return NULL;
347 347 }
348 348
349 349 // Currently, only survivors can be root regions.
350 350 HeapRegion* res = _next_survivor;
351 351 if (res != NULL) {
352 352 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
353 353 // Read it again in case it changed while we were waiting for the lock.
354 354 res = _next_survivor;
355 355 if (res != NULL) {
356 356 if (res == _young_list->last_survivor_region()) {
357 357 // We just claimed the last survivor so store NULL to indicate
358 358 // that we're done.
359 359 _next_survivor = NULL;
360 360 } else {
361 361 _next_survivor = res->get_next_young_region();
362 362 }
363 363 } else {
364 364 // Someone else claimed the last survivor while we were trying
365 365 // to take the lock so nothing else to do.
366 366 }
367 367 }
368 368 assert(res == NULL || res->is_survivor(), "post-condition");
369 369
370 370 return res;
371 371 }
372 372
373 373 void CMRootRegions::scan_finished() {
374 374 assert(scan_in_progress(), "pre-condition");
375 375
376 376 // Currently, only survivors can be root regions.
377 377 if (!_should_abort) {
378 378 assert(_next_survivor == NULL, "we should have claimed all survivors");
379 379 }
380 380 _next_survivor = NULL;
381 381
382 382 {
383 383 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
384 384 _scan_in_progress = false;
385 385 RootRegionScan_lock->notify_all();
386 386 }
387 387 }
388 388
389 389 bool CMRootRegions::wait_until_scan_finished() {
390 390 if (!scan_in_progress()) return false;
391 391
392 392 {
393 393 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
394 394 while (scan_in_progress()) {
395 395 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
396 396 }
397 397 }
398 398 return true;
399 399 }
400 400
401 401 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
402 402 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
403 403 #endif // _MSC_VER
404 404
405 405 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
406 406 return MAX2((n_par_threads + 2) / 4, 1U);
407 407 }
408 408
409 409 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
410 410 _markBitMap1(rs, MinObjAlignment - 1),
411 411 _markBitMap2(rs, MinObjAlignment - 1),
412 412
413 413 _parallel_marking_threads(0),
414 414 _max_parallel_marking_threads(0),
415 415 _sleep_factor(0.0),
416 416 _marking_task_overhead(1.0),
417 417 _cleanup_sleep_factor(0.0),
418 418 _cleanup_task_overhead(1.0),
419 419 _cleanup_list("Cleanup List"),
420 420 _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
421 421 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
422 422 CardTableModRefBS::card_shift,
423 423 false /* in_resource_area*/),
424 424
425 425 _prevMarkBitMap(&_markBitMap1),
426 426 _nextMarkBitMap(&_markBitMap2),
427 427
428 428 _markStack(this),
429 429 // _finger set in set_non_marking_state
430 430
431 431 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
432 432 // _active_tasks set in set_non_marking_state
433 433 // _tasks set inside the constructor
434 434 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
435 435 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
436 436
437 437 _has_overflown(false),
438 438 _concurrent(false),
439 439 _has_aborted(false),
440 440 _restart_for_overflow(false),
441 441 _concurrent_marking_in_progress(false),
442 442
443 443 // _verbose_level set below
444 444
445 445 _init_times(),
446 446 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
447 447 _cleanup_times(),
448 448 _total_counting_time(0.0),
449 449 _total_rs_scrub_time(0.0),
450 450
451 451 _parallel_workers(NULL),
452 452
453 453 _count_card_bitmaps(NULL),
454 454 _count_marked_bytes(NULL) {
455 455 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
456 456 if (verbose_level < no_verbose) {
457 457 verbose_level = no_verbose;
458 458 }
459 459 if (verbose_level > high_verbose) {
460 460 verbose_level = high_verbose;
461 461 }
462 462 _verbose_level = verbose_level;
463 463
464 464 if (verbose_low()) {
465 465 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
466 466 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
467 467 }
468 468
469 469 _markStack.allocate(MarkStackSize);
470 470
471 471 // Create & start a ConcurrentMark thread.
472 472 _cmThread = new ConcurrentMarkThread(this);
473 473 assert(cmThread() != NULL, "CM Thread should have been created");
474 474 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
475 475
476 476 _g1h = G1CollectedHeap::heap();
477 477 assert(CGC_lock != NULL, "Where's the CGC_lock?");
478 478 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
479 479 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
480 480
481 481 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
482 482 satb_qs.set_buffer_size(G1SATBBufferSize);
483 483
484 484 _root_regions.init(_g1h, this);
485 485
486 486 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);
487 487 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);
488 488
489 489 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num, mtGC);
490 490 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);
491 491
492 492 BitMap::idx_t card_bm_size = _card_bm.size();
493 493
494 494 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
495 495 _active_tasks = _max_task_num;
496 496 for (int i = 0; i < (int) _max_task_num; ++i) {
497 497 CMTaskQueue* task_queue = new CMTaskQueue();
498 498 task_queue->initialize();
499 499 _task_queues->register_queue(i, task_queue);
500 500
501 501 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
502 502 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
503 503
504 504 _tasks[i] = new CMTask(i, this,
505 505 _count_marked_bytes[i],
506 506 &_count_card_bitmaps[i],
507 507 task_queue, _task_queues);
508 508
509 509 _accum_task_vtime[i] = 0.0;
510 510 }
511 511
512 512 // Calculate the card number for the bottom of the heap. Used
513 513 // in biasing indexes into the accounting card bitmaps.
514 514 _heap_bottom_card_num =
515 515 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
516 516 CardTableModRefBS::card_shift);
517 517
518 518 // Clear all the liveness counting data
519 519 clear_all_count_data();
520 520
521 521 if (ConcGCThreads > ParallelGCThreads) {
522 522 vm_exit_during_initialization("Can't have more ConcGCThreads "
523 523 "than ParallelGCThreads.");
524 524 }
525 525 if (ParallelGCThreads == 0) {
526 526 // if we are not running with any parallel GC threads we will not
527 527 // spawn any marking threads either
528 528 _parallel_marking_threads = 0;
529 529 _max_parallel_marking_threads = 0;
530 530 _sleep_factor = 0.0;
531 531 _marking_task_overhead = 1.0;
532 532 } else {
533 533 if (ConcGCThreads > 0) {
534 534 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
535 535 // if both are set
536 536
537 537 _parallel_marking_threads = (uint) ConcGCThreads;
538 538 _max_parallel_marking_threads = _parallel_marking_threads;
539 539 _sleep_factor = 0.0;
540 540 _marking_task_overhead = 1.0;
541 541 } else if (G1MarkingOverheadPercent > 0) {
542 542 // we will calculate the number of parallel marking threads
543 543 // based on a target overhead with respect to the soft real-time
544 544 // goal
545 545
546 546 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
547 547 double overall_cm_overhead =
548 548 (double) MaxGCPauseMillis * marking_overhead /
549 549 (double) GCPauseIntervalMillis;
550 550 double cpu_ratio = 1.0 / (double) os::processor_count();
551 551 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
552 552 double marking_task_overhead =
553 553 overall_cm_overhead / marking_thread_num *
554 554 (double) os::processor_count();
555 555 double sleep_factor =
556 556 (1.0 - marking_task_overhead) / marking_task_overhead;
557 557
558 558 _parallel_marking_threads = (uint) marking_thread_num;
559 559 _max_parallel_marking_threads = _parallel_marking_threads;
560 560 _sleep_factor = sleep_factor;
561 561 _marking_task_overhead = marking_task_overhead;
562 562 } else {
563 563 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
564 564 _max_parallel_marking_threads = _parallel_marking_threads;
565 565 _sleep_factor = 0.0;
566 566 _marking_task_overhead = 1.0;
567 567 }
568 568
569 569 if (parallel_marking_threads() > 1) {
570 570 _cleanup_task_overhead = 1.0;
571 571 } else {
572 572 _cleanup_task_overhead = marking_task_overhead();
573 573 }
574 574 _cleanup_sleep_factor =
575 575 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
576 576
577 577 #if 0
578 578 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
579 579 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
580 580 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
581 581 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
582 582 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
583 583 #endif
584 584
585 585 guarantee(parallel_marking_threads() > 0, "peace of mind");
586 586 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
587 587 _max_parallel_marking_threads, false, true);
588 588 if (_parallel_workers == NULL) {
589 589 vm_exit_during_initialization("Failed necessary allocation.");
590 590 } else {
591 591 _parallel_workers->initialize_workers();
592 592 }
593 593 }
594 594
595 595 // so that the call below can read a sensible value
596 596 _heap_start = (HeapWord*) rs.base();
597 597 set_non_marking_state();
598 598 }
599 599
600 600 void ConcurrentMark::update_g1_committed(bool force) {
601 601 // If concurrent marking is not in progress, then we do not need to
602 602 // update _heap_end.
603 603 if (!concurrent_marking_in_progress() && !force) return;
604 604
605 605 MemRegion committed = _g1h->g1_committed();
606 606 assert(committed.start() == _heap_start, "start shouldn't change");
607 607 HeapWord* new_end = committed.end();
608 608 if (new_end > _heap_end) {
609 609 // The heap has been expanded.
610 610
611 611 _heap_end = new_end;
612 612 }
613 613 // Notice that the heap can also shrink. However, this only happens
614 614 // during a Full GC (at least currently) and the entire marking
615 615 // phase will bail out and the task will not be restarted. So, let's
616 616 // do nothing.
617 617 }
618 618
619 619 void ConcurrentMark::reset() {
620 620 // Starting values for these two. This should be called in a STW
621 621 // phase. CM will be notified of any future g1_committed expansions
622 622 // will be at the end of evacuation pauses, when tasks are
623 623 // inactive.
624 624 MemRegion committed = _g1h->g1_committed();
625 625 _heap_start = committed.start();
626 626 _heap_end = committed.end();
627 627
628 628 // Separated the asserts so that we know which one fires.
629 629 assert(_heap_start != NULL, "heap bounds should look ok");
630 630 assert(_heap_end != NULL, "heap bounds should look ok");
631 631 assert(_heap_start < _heap_end, "heap bounds should look ok");
632 632
633 633 // reset all the marking data structures and any necessary flags
634 634 clear_marking_state();
635 635
636 636 if (verbose_low()) {
637 637 gclog_or_tty->print_cr("[global] resetting");
638 638 }
639 639
640 640 // We do reset all of them, since different phases will use
641 641 // different number of active threads. So, it's easiest to have all
642 642 // of them ready.
643 643 for (int i = 0; i < (int) _max_task_num; ++i) {
644 644 _tasks[i]->reset(_nextMarkBitMap);
645 645 }
646 646
647 647 // we need this to make sure that the flag is on during the evac
648 648 // pause with initial mark piggy-backed
649 649 set_concurrent_marking_in_progress();
650 650 }
651 651
652 652 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
653 653 assert(active_tasks <= _max_task_num, "we should not have more");
654 654
655 655 _active_tasks = active_tasks;
656 656 // Need to update the three data structures below according to the
657 657 // number of active threads for this phase.
658 658 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
659 659 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
660 660 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
661 661
662 662 _concurrent = concurrent;
663 663 // We propagate this to all tasks, not just the active ones.
664 664 for (int i = 0; i < (int) _max_task_num; ++i)
665 665 _tasks[i]->set_concurrent(concurrent);
666 666
667 667 if (concurrent) {
668 668 set_concurrent_marking_in_progress();
669 669 } else {
670 670 // We currently assume that the concurrent flag has been set to
671 671 // false before we start remark. At this point we should also be
672 672 // in a STW phase.
673 673 assert(!concurrent_marking_in_progress(), "invariant");
674 674 assert(_finger == _heap_end, "only way to get here");
675 675 update_g1_committed(true);
676 676 }
677 677 }
678 678
679 679 void ConcurrentMark::set_non_marking_state() {
680 680 // We set the global marking state to some default values when we're
681 681 // not doing marking.
682 682 clear_marking_state();
683 683 _active_tasks = 0;
684 684 clear_concurrent_marking_in_progress();
685 685 }
686 686
687 687 ConcurrentMark::~ConcurrentMark() {
688 688 // The ConcurrentMark instance is never freed.
689 689 ShouldNotReachHere();
690 690 }
691 691
692 692 void ConcurrentMark::clearNextBitmap() {
693 693 G1CollectedHeap* g1h = G1CollectedHeap::heap();
694 694 G1CollectorPolicy* g1p = g1h->g1_policy();
695 695
696 696 // Make sure that the concurrent mark thread looks to still be in
697 697 // the current cycle.
698 698 guarantee(cmThread()->during_cycle(), "invariant");
699 699
700 700 // We are finishing up the current cycle by clearing the next
701 701 // marking bitmap and getting it ready for the next cycle. During
702 702 // this time no other cycle can start. So, let's make sure that this
703 703 // is the case.
704 704 guarantee(!g1h->mark_in_progress(), "invariant");
705 705
706 706 // clear the mark bitmap (no grey objects to start with).
707 707 // We need to do this in chunks and offer to yield in between
708 708 // each chunk.
709 709 HeapWord* start = _nextMarkBitMap->startWord();
710 710 HeapWord* end = _nextMarkBitMap->endWord();
711 711 HeapWord* cur = start;
712 712 size_t chunkSize = M;
713 713 while (cur < end) {
714 714 HeapWord* next = cur + chunkSize;
715 715 if (next > end) {
716 716 next = end;
717 717 }
718 718 MemRegion mr(cur,next);
719 719 _nextMarkBitMap->clearRange(mr);
720 720 cur = next;
721 721 do_yield_check();
722 722
723 723 // Repeat the asserts from above. We'll do them as asserts here to
724 724 // minimize their overhead on the product. However, we'll have
725 725 // them as guarantees at the beginning / end of the bitmap
726 726 // clearing to get some checking in the product.
727 727 assert(cmThread()->during_cycle(), "invariant");
728 728 assert(!g1h->mark_in_progress(), "invariant");
729 729 }
730 730
731 731 // Clear the liveness counting data
732 732 clear_all_count_data();
733 733
734 734 // Repeat the asserts from above.
735 735 guarantee(cmThread()->during_cycle(), "invariant");
736 736 guarantee(!g1h->mark_in_progress(), "invariant");
737 737 }
738 738
739 739 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
740 740 public:
741 741 bool doHeapRegion(HeapRegion* r) {
742 742 if (!r->continuesHumongous()) {
743 743 r->note_start_of_marking();
744 744 }
745 745 return false;
746 746 }
747 747 };
748 748
749 749 void ConcurrentMark::checkpointRootsInitialPre() {
750 750 G1CollectedHeap* g1h = G1CollectedHeap::heap();
751 751 G1CollectorPolicy* g1p = g1h->g1_policy();
752 752
753 753 _has_aborted = false;
754 754
755 755 #ifndef PRODUCT
756 756 if (G1PrintReachableAtInitialMark) {
757 757 print_reachable("at-cycle-start",
758 758 VerifyOption_G1UsePrevMarking, true /* all */);
759 759 }
760 760 #endif
761 761
762 762 // Initialise marking structures. This has to be done in a STW phase.
763 763 reset();
764 764
765 765 // For each region note start of marking.
766 766 NoteStartOfMarkHRClosure startcl;
767 767 g1h->heap_region_iterate(&startcl);
768 768 }
769 769
770 770
771 771 void ConcurrentMark::checkpointRootsInitialPost() {
772 772 G1CollectedHeap* g1h = G1CollectedHeap::heap();
773 773
774 774 // If we force an overflow during remark, the remark operation will
775 775 // actually abort and we'll restart concurrent marking. If we always
776 776 // force an oveflow during remark we'll never actually complete the
777 777 // marking phase. So, we initilize this here, at the start of the
778 778 // cycle, so that at the remaining overflow number will decrease at
779 779 // every remark and we'll eventually not need to cause one.
780 780 force_overflow_stw()->init();
781 781
782 782 // Start Concurrent Marking weak-reference discovery.
783 783 ReferenceProcessor* rp = g1h->ref_processor_cm();
784 784 // enable ("weak") refs discovery
785 785 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
786 786 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
787 787
788 788 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
789 789 // This is the start of the marking cycle, we're expected all
790 790 // threads to have SATB queues with active set to false.
791 791 satb_mq_set.set_active_all_threads(true, /* new active value */
792 792 false /* expected_active */);
793 793
794 794 _root_regions.prepare_for_scan();
795 795
796 796 // update_g1_committed() will be called at the end of an evac pause
797 797 // when marking is on. So, it's also called at the end of the
798 798 // initial-mark pause to update the heap end, if the heap expands
799 799 // during it. No need to call it here.
800 800 }
801 801
802 802 /*
803 803 * Notice that in the next two methods, we actually leave the STS
804 804 * during the barrier sync and join it immediately afterwards. If we
805 805 * do not do this, the following deadlock can occur: one thread could
806 806 * be in the barrier sync code, waiting for the other thread to also
807 807 * sync up, whereas another one could be trying to yield, while also
808 808 * waiting for the other threads to sync up too.
809 809 *
810 810 * Note, however, that this code is also used during remark and in
811 811 * this case we should not attempt to leave / enter the STS, otherwise
812 812 * we'll either hit an asseert (debug / fastdebug) or deadlock
813 813 * (product). So we should only leave / enter the STS if we are
814 814 * operating concurrently.
815 815 *
816 816 * Because the thread that does the sync barrier has left the STS, it
817 817 * is possible to be suspended for a Full GC or an evacuation pause
818 818 * could occur. This is actually safe, since the entering the sync
819 819 * barrier is one of the last things do_marking_step() does, and it
820 820 * doesn't manipulate any data structures afterwards.
821 821 */
822 822
823 823 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
824 824 if (verbose_low()) {
825 825 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
826 826 }
827 827
828 828 if (concurrent()) {
829 829 ConcurrentGCThread::stsLeave();
830 830 }
831 831 _first_overflow_barrier_sync.enter();
832 832 if (concurrent()) {
833 833 ConcurrentGCThread::stsJoin();
834 834 }
835 835 // at this point everyone should have synced up and not be doing any
836 836 // more work
837 837
838 838 if (verbose_low()) {
839 839 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
840 840 }
841 841
842 842 // let task 0 do this
843 843 if (task_num == 0) {
844 844 // task 0 is responsible for clearing the global data structures
845 845 // We should be here because of an overflow. During STW we should
846 846 // not clear the overflow flag since we rely on it being true when
847 847 // we exit this method to abort the pause and restart concurent
848 848 // marking.
849 849 clear_marking_state(concurrent() /* clear_overflow */);
850 850 force_overflow()->update();
851 851
852 852 if (G1Log::fine()) {
853 853 gclog_or_tty->date_stamp(PrintGCDateStamps);
854 854 gclog_or_tty->stamp(PrintGCTimeStamps);
855 855 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
856 856 }
857 857 }
858 858
859 859 // after this, each task should reset its own data structures then
860 860 // then go into the second barrier
861 861 }
862 862
863 863 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
864 864 if (verbose_low()) {
865 865 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
866 866 }
867 867
868 868 if (concurrent()) {
869 869 ConcurrentGCThread::stsLeave();
870 870 }
871 871 _second_overflow_barrier_sync.enter();
872 872 if (concurrent()) {
873 873 ConcurrentGCThread::stsJoin();
874 874 }
875 875 // at this point everything should be re-initialised and ready to go
876 876
877 877 if (verbose_low()) {
878 878 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
879 879 }
880 880 }
881 881
882 882 #ifndef PRODUCT
883 883 void ForceOverflowSettings::init() {
884 884 _num_remaining = G1ConcMarkForceOverflow;
885 885 _force = false;
886 886 update();
887 887 }
888 888
889 889 void ForceOverflowSettings::update() {
890 890 if (_num_remaining > 0) {
891 891 _num_remaining -= 1;
892 892 _force = true;
893 893 } else {
894 894 _force = false;
895 895 }
896 896 }
897 897
898 898 bool ForceOverflowSettings::should_force() {
899 899 if (_force) {
900 900 _force = false;
901 901 return true;
902 902 } else {
903 903 return false;
904 904 }
905 905 }
906 906 #endif // !PRODUCT
907 907
908 908 class CMConcurrentMarkingTask: public AbstractGangTask {
909 909 private:
910 910 ConcurrentMark* _cm;
911 911 ConcurrentMarkThread* _cmt;
912 912
913 913 public:
914 914 void work(uint worker_id) {
915 915 assert(Thread::current()->is_ConcurrentGC_thread(),
916 916 "this should only be done by a conc GC thread");
917 917 ResourceMark rm;
918 918
919 919 double start_vtime = os::elapsedVTime();
920 920
921 921 ConcurrentGCThread::stsJoin();
922 922
923 923 assert(worker_id < _cm->active_tasks(), "invariant");
924 924 CMTask* the_task = _cm->task(worker_id);
925 925 the_task->record_start_time();
926 926 if (!_cm->has_aborted()) {
927 927 do {
928 928 double start_vtime_sec = os::elapsedVTime();
929 929 double start_time_sec = os::elapsedTime();
930 930 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
931 931
932 932 the_task->do_marking_step(mark_step_duration_ms,
933 933 true /* do_stealing */,
934 934 true /* do_termination */);
935 935
936 936 double end_time_sec = os::elapsedTime();
937 937 double end_vtime_sec = os::elapsedVTime();
938 938 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
939 939 double elapsed_time_sec = end_time_sec - start_time_sec;
940 940 _cm->clear_has_overflown();
941 941
942 942 bool ret = _cm->do_yield_check(worker_id);
943 943
944 944 jlong sleep_time_ms;
945 945 if (!_cm->has_aborted() && the_task->has_aborted()) {
946 946 sleep_time_ms =
947 947 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
948 948 ConcurrentGCThread::stsLeave();
949 949 os::sleep(Thread::current(), sleep_time_ms, false);
950 950 ConcurrentGCThread::stsJoin();
951 951 }
952 952 double end_time2_sec = os::elapsedTime();
953 953 double elapsed_time2_sec = end_time2_sec - start_time_sec;
954 954
955 955 #if 0
956 956 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
957 957 "overhead %1.4lf",
958 958 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
959 959 the_task->conc_overhead(os::elapsedTime()) * 8.0);
960 960 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
961 961 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
962 962 #endif
963 963 } while (!_cm->has_aborted() && the_task->has_aborted());
964 964 }
965 965 the_task->record_end_time();
966 966 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
967 967
968 968 ConcurrentGCThread::stsLeave();
969 969
970 970 double end_vtime = os::elapsedVTime();
971 971 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
972 972 }
973 973
974 974 CMConcurrentMarkingTask(ConcurrentMark* cm,
975 975 ConcurrentMarkThread* cmt) :
976 976 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
977 977
978 978 ~CMConcurrentMarkingTask() { }
979 979 };
980 980
981 981 // Calculates the number of active workers for a concurrent
982 982 // phase.
983 983 uint ConcurrentMark::calc_parallel_marking_threads() {
984 984 if (G1CollectedHeap::use_parallel_gc_threads()) {
985 985 uint n_conc_workers = 0;
986 986 if (!UseDynamicNumberOfGCThreads ||
987 987 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
988 988 !ForceDynamicNumberOfGCThreads)) {
989 989 n_conc_workers = max_parallel_marking_threads();
990 990 } else {
991 991 n_conc_workers =
992 992 AdaptiveSizePolicy::calc_default_active_workers(
993 993 max_parallel_marking_threads(),
994 994 1, /* Minimum workers */
995 995 parallel_marking_threads(),
996 996 Threads::number_of_non_daemon_threads());
997 997 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
998 998 // that scaling has already gone into "_max_parallel_marking_threads".
999 999 }
1000 1000 assert(n_conc_workers > 0, "Always need at least 1");
1001 1001 return n_conc_workers;
1002 1002 }
1003 1003 // If we are not running with any parallel GC threads we will not
1004 1004 // have spawned any marking threads either. Hence the number of
1005 1005 // concurrent workers should be 0.
1006 1006 return 0;
1007 1007 }
1008 1008
1009 1009 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1010 1010 // Currently, only survivors can be root regions.
1011 1011 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1012 1012 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1013 1013
1014 1014 const uintx interval = PrefetchScanIntervalInBytes;
1015 1015 HeapWord* curr = hr->bottom();
1016 1016 const HeapWord* end = hr->top();
1017 1017 while (curr < end) {
1018 1018 Prefetch::read(curr, interval);
1019 1019 oop obj = oop(curr);
1020 1020 int size = obj->oop_iterate(&cl);
1021 1021 assert(size == obj->size(), "sanity");
1022 1022 curr += size;
1023 1023 }
1024 1024 }
1025 1025
1026 1026 class CMRootRegionScanTask : public AbstractGangTask {
1027 1027 private:
1028 1028 ConcurrentMark* _cm;
1029 1029
1030 1030 public:
1031 1031 CMRootRegionScanTask(ConcurrentMark* cm) :
1032 1032 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1033 1033
1034 1034 void work(uint worker_id) {
1035 1035 assert(Thread::current()->is_ConcurrentGC_thread(),
1036 1036 "this should only be done by a conc GC thread");
1037 1037
1038 1038 CMRootRegions* root_regions = _cm->root_regions();
1039 1039 HeapRegion* hr = root_regions->claim_next();
1040 1040 while (hr != NULL) {
1041 1041 _cm->scanRootRegion(hr, worker_id);
1042 1042 hr = root_regions->claim_next();
1043 1043 }
1044 1044 }
1045 1045 };
1046 1046
1047 1047 void ConcurrentMark::scanRootRegions() {
1048 1048 // scan_in_progress() will have been set to true only if there was
1049 1049 // at least one root region to scan. So, if it's false, we
1050 1050 // should not attempt to do any further work.
1051 1051 if (root_regions()->scan_in_progress()) {
1052 1052 _parallel_marking_threads = calc_parallel_marking_threads();
1053 1053 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1054 1054 "Maximum number of marking threads exceeded");
1055 1055 uint active_workers = MAX2(1U, parallel_marking_threads());
1056 1056
1057 1057 CMRootRegionScanTask task(this);
1058 1058 if (parallel_marking_threads() > 0) {
1059 1059 _parallel_workers->set_active_workers((int) active_workers);
1060 1060 _parallel_workers->run_task(&task);
1061 1061 } else {
1062 1062 task.work(0);
1063 1063 }
1064 1064
1065 1065 // It's possible that has_aborted() is true here without actually
1066 1066 // aborting the survivor scan earlier. This is OK as it's
1067 1067 // mainly used for sanity checking.
1068 1068 root_regions()->scan_finished();
1069 1069 }
1070 1070 }
1071 1071
1072 1072 void ConcurrentMark::markFromRoots() {
1073 1073 // we might be tempted to assert that:
1074 1074 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1075 1075 // "inconsistent argument?");
1076 1076 // However that wouldn't be right, because it's possible that
1077 1077 // a safepoint is indeed in progress as a younger generation
1078 1078 // stop-the-world GC happens even as we mark in this generation.
1079 1079
1080 1080 _restart_for_overflow = false;
1081 1081 force_overflow_conc()->init();
1082 1082
1083 1083 // _g1h has _n_par_threads
1084 1084 _parallel_marking_threads = calc_parallel_marking_threads();
1085 1085 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1086 1086 "Maximum number of marking threads exceeded");
1087 1087
1088 1088 uint active_workers = MAX2(1U, parallel_marking_threads());
1089 1089
1090 1090 // Parallel task terminator is set in "set_phase()"
1091 1091 set_phase(active_workers, true /* concurrent */);
1092 1092
1093 1093 CMConcurrentMarkingTask markingTask(this, cmThread());
1094 1094 if (parallel_marking_threads() > 0) {
1095 1095 _parallel_workers->set_active_workers((int)active_workers);
1096 1096 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1097 1097 // and the decisions on that MT processing is made elsewhere.
1098 1098 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1099 1099 _parallel_workers->run_task(&markingTask);
1100 1100 } else {
1101 1101 markingTask.work(0);
1102 1102 }
1103 1103 print_stats();
1104 1104 }
1105 1105
1106 1106 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1107 1107 // world is stopped at this checkpoint
1108 1108 assert(SafepointSynchronize::is_at_safepoint(),
1109 1109 "world should be stopped");
1110 1110
1111 1111 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1112 1112
1113 1113 // If a full collection has happened, we shouldn't do this.
1114 1114 if (has_aborted()) {
1115 1115 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1116 1116 return;
1117 1117 }
1118 1118
1119 1119 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1120 1120
1121 1121 if (VerifyDuringGC) {
1122 1122 HandleMark hm; // handle scope
1123 1123 gclog_or_tty->print(" VerifyDuringGC:(before)");
1124 1124 Universe::heap()->prepare_for_verify();
1125 1125 Universe::verify(/* silent */ false,
1126 1126 /* option */ VerifyOption_G1UsePrevMarking);
1127 1127 }
1128 1128
1129 1129 G1CollectorPolicy* g1p = g1h->g1_policy();
1130 1130 g1p->record_concurrent_mark_remark_start();
1131 1131
1132 1132 double start = os::elapsedTime();
1133 1133
1134 1134 checkpointRootsFinalWork();
1135 1135
1136 1136 double mark_work_end = os::elapsedTime();
1137 1137
1138 1138 weakRefsWork(clear_all_soft_refs);
1139 1139
1140 1140 if (has_overflown()) {
1141 1141 // Oops. We overflowed. Restart concurrent marking.
1142 1142 _restart_for_overflow = true;
1143 1143 // Clear the flag. We do not need it any more.
1144 1144 clear_has_overflown();
1145 1145 if (G1TraceMarkStackOverflow) {
1146 1146 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1147 1147 }
1148 1148 } else {
1149 1149 // Aggregate the per-task counting data that we have accumulated
1150 1150 // while marking.
1151 1151 aggregate_count_data();
1152 1152
1153 1153 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1154 1154 // We're done with marking.
1155 1155 // This is the end of the marking cycle, we're expected all
1156 1156 // threads to have SATB queues with active set to true.
1157 1157 satb_mq_set.set_active_all_threads(false, /* new active value */
1158 1158 true /* expected_active */);
1159 1159
1160 1160 if (VerifyDuringGC) {
1161 1161 HandleMark hm; // handle scope
1162 1162 gclog_or_tty->print(" VerifyDuringGC:(after)");
1163 1163 Universe::heap()->prepare_for_verify();
1164 1164 Universe::verify(/* silent */ false,
1165 1165 /* option */ VerifyOption_G1UseNextMarking);
1166 1166 }
1167 1167 assert(!restart_for_overflow(), "sanity");
1168 1168 }
1169 1169
1170 1170 // Reset the marking state if marking completed
1171 1171 if (!restart_for_overflow()) {
1172 1172 set_non_marking_state();
1173 1173 }
1174 1174
1175 1175 #if VERIFY_OBJS_PROCESSED
1176 1176 _scan_obj_cl.objs_processed = 0;
1177 1177 ThreadLocalObjQueue::objs_enqueued = 0;
1178 1178 #endif
1179 1179
1180 1180 // Statistics
1181 1181 double now = os::elapsedTime();
1182 1182 _remark_mark_times.add((mark_work_end - start) * 1000.0);
↓ open down ↓ |
1182 lines elided |
↑ open up ↑ |
1183 1183 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1184 1184 _remark_times.add((now - start) * 1000.0);
1185 1185
1186 1186 g1p->record_concurrent_mark_remark_end();
1187 1187 }
1188 1188
1189 1189 // Base class of the closures that finalize and verify the
1190 1190 // liveness counting data.
1191 1191 class CMCountDataClosureBase: public HeapRegionClosure {
1192 1192 protected:
1193 + G1CollectedHeap* _g1h;
1193 1194 ConcurrentMark* _cm;
1195 + CardTableModRefBS* _ct_bs;
1196 +
1194 1197 BitMap* _region_bm;
1195 1198 BitMap* _card_bm;
1196 1199
1197 - void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
1198 - assert(start_idx <= last_idx, "sanity");
1199 -
1200 - // Set the inclusive bit range [start_idx, last_idx].
1201 - // For small ranges (up to 8 cards) use a simple loop; otherwise
1202 - // use par_at_put_range.
1203 - if ((last_idx - start_idx) < 8) {
1204 - for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1205 - _card_bm->par_set_bit(i);
1206 - }
1207 - } else {
1208 - assert(last_idx < _card_bm->size(), "sanity");
1209 - // Note BitMap::par_at_put_range() is exclusive.
1210 - _card_bm->par_at_put_range(start_idx, last_idx+1, true);
1211 - }
1212 - }
1213 -
1214 - // It takes a region that's not empty (i.e., it has at least one
1200 + // Takes a region that's not empty (i.e., it has at least one
1215 1201 // live object in it and sets its corresponding bit on the region
1216 1202 // bitmap to 1. If the region is "starts humongous" it will also set
1217 1203 // to 1 the bits on the region bitmap that correspond to its
1218 1204 // associated "continues humongous" regions.
1219 1205 void set_bit_for_region(HeapRegion* hr) {
1220 1206 assert(!hr->continuesHumongous(), "should have filtered those out");
1221 1207
1222 1208 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1223 1209 if (!hr->startsHumongous()) {
1224 1210 // Normal (non-humongous) case: just set the bit.
1225 1211 _region_bm->par_at_put(index, true);
1226 1212 } else {
1227 1213 // Starts humongous case: calculate how many regions are part of
1228 1214 // this humongous region and then set the bit range.
1229 1215 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1230 1216 _region_bm->par_at_put_range(index, end_index, true);
1231 1217 }
1232 1218 }
1233 1219
1234 1220 public:
1235 - CMCountDataClosureBase(ConcurrentMark *cm,
1221 + CMCountDataClosureBase(G1CollectedHeap* g1h,
1236 1222 BitMap* region_bm, BitMap* card_bm):
1237 - _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { }
1223 + _g1h(g1h), _cm(g1h->concurrent_mark()),
1224 + _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1225 + _region_bm(region_bm), _card_bm(card_bm) { }
1238 1226 };
1239 1227
1240 1228 // Closure that calculates the # live objects per region. Used
1241 1229 // for verification purposes during the cleanup pause.
1242 1230 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1243 1231 CMBitMapRO* _bm;
1244 1232 size_t _region_marked_bytes;
1245 1233
1246 1234 public:
1247 - CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1235 + CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1248 1236 BitMap* region_bm, BitMap* card_bm) :
1249 - CMCountDataClosureBase(cm, region_bm, card_bm),
1237 + CMCountDataClosureBase(g1h, region_bm, card_bm),
1250 1238 _bm(bm), _region_marked_bytes(0) { }
1251 1239
1252 1240 bool doHeapRegion(HeapRegion* hr) {
1253 1241
1254 1242 if (hr->continuesHumongous()) {
1255 1243 // We will ignore these here and process them when their
1256 1244 // associated "starts humongous" region is processed (see
1257 1245 // set_bit_for_heap_region()). Note that we cannot rely on their
1258 1246 // associated "starts humongous" region to have their bit set to
1259 1247 // 1 since, due to the region chunking in the parallel region
1260 1248 // iteration, a "continues humongous" region might be visited
1261 1249 // before its associated "starts humongous".
1262 1250 return false;
1263 1251 }
1264 1252
1265 - HeapWord* nextTop = hr->next_top_at_mark_start();
1266 - HeapWord* start = hr->bottom();
1253 + HeapWord* ntams = hr->next_top_at_mark_start();
1254 + HeapWord* start = hr->bottom();
1267 1255
1268 - assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1256 + assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1269 1257 err_msg("Preconditions not met - "
1270 - "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
1271 - start, nextTop, hr->end()));
1258 + "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1259 + start, ntams, hr->end()));
1272 1260
1273 1261 // Find the first marked object at or after "start".
1274 - start = _bm->getNextMarkedWordAddress(start, nextTop);
1262 + start = _bm->getNextMarkedWordAddress(start, ntams);
1275 1263
1276 1264 size_t marked_bytes = 0;
1277 1265
1278 - while (start < nextTop) {
1266 + while (start < ntams) {
1279 1267 oop obj = oop(start);
1280 1268 int obj_sz = obj->size();
1281 - HeapWord* obj_last = start + obj_sz - 1;
1269 + HeapWord* obj_end = start + obj_sz;
1282 1270
1283 1271 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1284 - BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last);
1272 + BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1285 1273
1286 - // Set the bits in the card BM for this object (inclusive).
1287 - set_card_bitmap_range(start_idx, last_idx);
1274 + // Note: if we're looking at the last region in heap - obj_end
1275 + // could be actually just beyond the end of the heap; end_idx
1276 + // will then correspond to a (non-existent) card that is also
1277 + // just beyond the heap.
1278 + if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1279 + // end of object is not card aligned - increment to cover
1280 + // all the cards spanned by the object
1281 + end_idx += 1;
1282 + }
1283 +
1284 + // Set the bits in the card BM for the cards spanned by this object.
1285 + _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1288 1286
1289 1287 // Add the size of this object to the number of marked bytes.
1290 1288 marked_bytes += (size_t)obj_sz * HeapWordSize;
1291 1289
1292 1290 // Find the next marked object after this one.
1293 - start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop);
1291 + start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1294 1292 }
1295 1293
1296 1294 // Mark the allocated-since-marking portion...
1297 1295 HeapWord* top = hr->top();
1298 - if (nextTop < top) {
1299 - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop);
1300 - BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1);
1296 + if (ntams < top) {
1297 + BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1298 + BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1301 1299
1302 - set_card_bitmap_range(start_idx, last_idx);
1300 + // Note: if we're looking at the last region in heap - top
1301 + // could be actually just beyond the end of the heap; end_idx
1302 + // will then correspond to a (non-existent) card that is also
1303 + // just beyond the heap.
1304 + if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1305 + // end of object is not card aligned - increment to cover
1306 + // all the cards spanned by the object
1307 + end_idx += 1;
1308 + }
1309 + _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1303 1310
1304 1311 // This definitely means the region has live objects.
1305 1312 set_bit_for_region(hr);
1306 1313 }
1307 1314
1308 1315 // Update the live region bitmap.
1309 1316 if (marked_bytes > 0) {
1310 1317 set_bit_for_region(hr);
1311 1318 }
1312 1319
1313 1320 // Set the marked bytes for the current region so that
1314 1321 // it can be queried by a calling verificiation routine
1315 1322 _region_marked_bytes = marked_bytes;
1316 1323
1317 1324 return false;
1318 1325 }
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
1319 1326
1320 1327 size_t region_marked_bytes() const { return _region_marked_bytes; }
1321 1328 };
1322 1329
1323 1330 // Heap region closure used for verifying the counting data
1324 1331 // that was accumulated concurrently and aggregated during
1325 1332 // the remark pause. This closure is applied to the heap
1326 1333 // regions during the STW cleanup pause.
1327 1334
1328 1335 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1336 + G1CollectedHeap* _g1h;
1329 1337 ConcurrentMark* _cm;
1330 1338 CalcLiveObjectsClosure _calc_cl;
1331 1339 BitMap* _region_bm; // Region BM to be verified
1332 1340 BitMap* _card_bm; // Card BM to be verified
1333 1341 bool _verbose; // verbose output?
1334 1342
1335 1343 BitMap* _exp_region_bm; // Expected Region BM values
1336 1344 BitMap* _exp_card_bm; // Expected card BM values
1337 1345
1338 1346 int _failures;
1339 1347
1340 1348 public:
1341 - VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1349 + VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1342 1350 BitMap* region_bm,
1343 1351 BitMap* card_bm,
1344 1352 BitMap* exp_region_bm,
1345 1353 BitMap* exp_card_bm,
1346 1354 bool verbose) :
1347 - _cm(cm),
1348 - _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1355 + _g1h(g1h), _cm(g1h->concurrent_mark()),
1356 + _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1349 1357 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1350 1358 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1351 1359 _failures(0) { }
1352 1360
1353 1361 int failures() const { return _failures; }
1354 1362
1355 1363 bool doHeapRegion(HeapRegion* hr) {
1356 1364 if (hr->continuesHumongous()) {
1357 1365 // We will ignore these here and process them when their
1358 1366 // associated "starts humongous" region is processed (see
1359 1367 // set_bit_for_heap_region()). Note that we cannot rely on their
1360 1368 // associated "starts humongous" region to have their bit set to
1361 1369 // 1 since, due to the region chunking in the parallel region
1362 1370 // iteration, a "continues humongous" region might be visited
1363 1371 // before its associated "starts humongous".
1364 1372 return false;
1365 1373 }
1366 1374
1367 1375 int failures = 0;
1368 1376
1369 1377 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1370 1378 // this region and set the corresponding bits in the expected region
1371 1379 // and card bitmaps.
1372 1380 bool res = _calc_cl.doHeapRegion(hr);
1373 1381 assert(res == false, "should be continuing");
1374 1382
1375 1383 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1376 1384 Mutex::_no_safepoint_check_flag);
1377 1385
1378 1386 // Verify the marked bytes for this region.
1379 1387 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1380 1388 size_t act_marked_bytes = hr->next_marked_bytes();
1381 1389
1382 1390 // We're not OK if expected marked bytes > actual marked bytes. It means
1383 1391 // we have missed accounting some objects during the actual marking.
1384 1392 if (exp_marked_bytes > act_marked_bytes) {
1385 1393 if (_verbose) {
1386 1394 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1387 1395 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1388 1396 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1389 1397 }
1390 1398 failures += 1;
1391 1399 }
1392 1400
1393 1401 // Verify the bit, for this region, in the actual and expected
1394 1402 // (which was just calculated) region bit maps.
1395 1403 // We're not OK if the bit in the calculated expected region
1396 1404 // bitmap is set and the bit in the actual region bitmap is not.
1397 1405 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1398 1406
1399 1407 bool expected = _exp_region_bm->at(index);
1400 1408 bool actual = _region_bm->at(index);
1401 1409 if (expected && !actual) {
1402 1410 if (_verbose) {
1403 1411 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1404 1412 "expected: %s, actual: %s",
1405 1413 hr->hrs_index(),
1406 1414 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1407 1415 }
1408 1416 failures += 1;
1409 1417 }
1410 1418
1411 1419 // Verify that the card bit maps for the cards spanned by the current
1412 1420 // region match. We have an error if we have a set bit in the expected
1413 1421 // bit map and the corresponding bit in the actual bitmap is not set.
1414 1422
1415 1423 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1416 1424 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1417 1425
1418 1426 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1419 1427 expected = _exp_card_bm->at(i);
1420 1428 actual = _card_bm->at(i);
1421 1429
1422 1430 if (expected && !actual) {
1423 1431 if (_verbose) {
1424 1432 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1425 1433 "expected: %s, actual: %s",
1426 1434 hr->hrs_index(), i,
1427 1435 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1428 1436 }
1429 1437 failures += 1;
1430 1438 }
1431 1439 }
1432 1440
1433 1441 if (failures > 0 && _verbose) {
1434 1442 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1435 1443 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1436 1444 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1437 1445 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1438 1446 }
1439 1447
1440 1448 _failures += failures;
1441 1449
1442 1450 // We could stop iteration over the heap when we
1443 1451 // find the first violating region by returning true.
1444 1452 return false;
1445 1453 }
1446 1454 };
1447 1455
1448 1456
1449 1457 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1450 1458 protected:
1451 1459 G1CollectedHeap* _g1h;
1452 1460 ConcurrentMark* _cm;
1453 1461 BitMap* _actual_region_bm;
1454 1462 BitMap* _actual_card_bm;
1455 1463
1456 1464 uint _n_workers;
1457 1465
1458 1466 BitMap* _expected_region_bm;
1459 1467 BitMap* _expected_card_bm;
1460 1468
1461 1469 int _failures;
1462 1470 bool _verbose;
1463 1471
1464 1472 public:
1465 1473 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1466 1474 BitMap* region_bm, BitMap* card_bm,
1467 1475 BitMap* expected_region_bm, BitMap* expected_card_bm)
1468 1476 : AbstractGangTask("G1 verify final counting"),
1469 1477 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1470 1478 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1471 1479 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1472 1480 _failures(0), _verbose(false),
1473 1481 _n_workers(0) {
1474 1482 assert(VerifyDuringGC, "don't call this otherwise");
1475 1483
1476 1484 // Use the value already set as the number of active threads
1477 1485 // in the call to run_task().
1478 1486 if (G1CollectedHeap::use_parallel_gc_threads()) {
1479 1487 assert( _g1h->workers()->active_workers() > 0,
1480 1488 "Should have been previously set");
1481 1489 _n_workers = _g1h->workers()->active_workers();
1482 1490 } else {
1483 1491 _n_workers = 1;
1484 1492 }
↓ open down ↓ |
126 lines elided |
↑ open up ↑ |
1485 1493
1486 1494 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1487 1495 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1488 1496
1489 1497 _verbose = _cm->verbose_medium();
1490 1498 }
1491 1499
1492 1500 void work(uint worker_id) {
1493 1501 assert(worker_id < _n_workers, "invariant");
1494 1502
1495 - VerifyLiveObjectDataHRClosure verify_cl(_cm,
1503 + VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1496 1504 _actual_region_bm, _actual_card_bm,
1497 1505 _expected_region_bm,
1498 1506 _expected_card_bm,
1499 1507 _verbose);
1500 1508
1501 1509 if (G1CollectedHeap::use_parallel_gc_threads()) {
1502 1510 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1503 1511 worker_id,
1504 1512 _n_workers,
1505 1513 HeapRegion::VerifyCountClaimValue);
1506 1514 } else {
1507 1515 _g1h->heap_region_iterate(&verify_cl);
1508 1516 }
1509 1517
1510 1518 Atomic::add(verify_cl.failures(), &_failures);
1511 1519 }
1512 1520
1513 1521 int failures() const { return _failures; }
1514 1522 };
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
1515 1523
1516 1524 // Closure that finalizes the liveness counting data.
1517 1525 // Used during the cleanup pause.
1518 1526 // Sets the bits corresponding to the interval [NTAMS, top]
1519 1527 // (which contains the implicitly live objects) in the
1520 1528 // card liveness bitmap. Also sets the bit for each region,
1521 1529 // containing live data, in the region liveness bitmap.
1522 1530
1523 1531 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1524 1532 public:
1525 - FinalCountDataUpdateClosure(ConcurrentMark* cm,
1533 + FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1526 1534 BitMap* region_bm,
1527 1535 BitMap* card_bm) :
1528 - CMCountDataClosureBase(cm, region_bm, card_bm) { }
1536 + CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1529 1537
1530 1538 bool doHeapRegion(HeapRegion* hr) {
1531 1539
1532 1540 if (hr->continuesHumongous()) {
1533 1541 // We will ignore these here and process them when their
1534 1542 // associated "starts humongous" region is processed (see
1535 1543 // set_bit_for_heap_region()). Note that we cannot rely on their
1536 1544 // associated "starts humongous" region to have their bit set to
1537 1545 // 1 since, due to the region chunking in the parallel region
1538 1546 // iteration, a "continues humongous" region might be visited
1539 1547 // before its associated "starts humongous".
1540 1548 return false;
1541 1549 }
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
1542 1550
1543 1551 HeapWord* ntams = hr->next_top_at_mark_start();
1544 1552 HeapWord* top = hr->top();
1545 1553
1546 1554 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1547 1555
1548 1556 // Mark the allocated-since-marking portion...
1549 1557 if (ntams < top) {
1550 1558 // This definitely means the region has live objects.
1551 1559 set_bit_for_region(hr);
1552 - }
1553 1560
1554 - // Now set the bits for [ntams, top]
1555 - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1556 - BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);
1557 - set_card_bitmap_range(start_idx, last_idx);
1561 + // Now set the bits in the card bitmap for [ntams, top)
1562 + BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1563 + BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1564 +
1565 + // Note: if we're looking at the last region in heap - top
1566 + // could be actually just beyond the end of the heap; end_idx
1567 + // will then correspond to a (non-existent) card that is also
1568 + // just beyond the heap.
1569 + if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1570 + // end of object is not card aligned - increment to cover
1571 + // all the cards spanned by the object
1572 + end_idx += 1;
1573 + }
1574 +
1575 + assert(end_idx <= _card_bm->size(),
1576 + err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1577 + end_idx, _card_bm->size()));
1578 + assert(start_idx < _card_bm->size(),
1579 + err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1580 + start_idx, _card_bm->size()));
1581 +
1582 + _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1583 + }
1558 1584
1559 1585 // Set the bit for the region if it contains live data
1560 1586 if (hr->next_marked_bytes() > 0) {
1561 1587 set_bit_for_region(hr);
1562 1588 }
1563 1589
1564 1590 return false;
1565 1591 }
1566 1592 };
1567 1593
1568 1594 class G1ParFinalCountTask: public AbstractGangTask {
1569 1595 protected:
1570 1596 G1CollectedHeap* _g1h;
1571 1597 ConcurrentMark* _cm;
1572 1598 BitMap* _actual_region_bm;
1573 1599 BitMap* _actual_card_bm;
1574 1600
1575 1601 uint _n_workers;
1576 1602
1577 1603 public:
1578 1604 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1579 1605 : AbstractGangTask("G1 final counting"),
1580 1606 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1581 1607 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1582 1608 _n_workers(0) {
1583 1609 // Use the value already set as the number of active threads
1584 1610 // in the call to run_task().
1585 1611 if (G1CollectedHeap::use_parallel_gc_threads()) {
1586 1612 assert( _g1h->workers()->active_workers() > 0,
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
1587 1613 "Should have been previously set");
1588 1614 _n_workers = _g1h->workers()->active_workers();
1589 1615 } else {
1590 1616 _n_workers = 1;
1591 1617 }
1592 1618 }
1593 1619
1594 1620 void work(uint worker_id) {
1595 1621 assert(worker_id < _n_workers, "invariant");
1596 1622
1597 - FinalCountDataUpdateClosure final_update_cl(_cm,
1623 + FinalCountDataUpdateClosure final_update_cl(_g1h,
1598 1624 _actual_region_bm,
1599 1625 _actual_card_bm);
1600 1626
1601 1627 if (G1CollectedHeap::use_parallel_gc_threads()) {
1602 1628 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1603 1629 worker_id,
1604 1630 _n_workers,
1605 1631 HeapRegion::FinalCountClaimValue);
1606 1632 } else {
1607 1633 _g1h->heap_region_iterate(&final_update_cl);
1608 1634 }
1609 1635 }
1610 1636 };
1611 1637
1612 1638 class G1ParNoteEndTask;
1613 1639
1614 1640 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1615 1641 G1CollectedHeap* _g1;
1616 1642 int _worker_num;
1617 1643 size_t _max_live_bytes;
1618 1644 uint _regions_claimed;
1619 1645 size_t _freed_bytes;
1620 1646 FreeRegionList* _local_cleanup_list;
1621 1647 OldRegionSet* _old_proxy_set;
1622 1648 HumongousRegionSet* _humongous_proxy_set;
1623 1649 HRRSCleanupTask* _hrrs_cleanup_task;
1624 1650 double _claimed_region_time;
1625 1651 double _max_region_time;
1626 1652
1627 1653 public:
1628 1654 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1629 1655 int worker_num,
1630 1656 FreeRegionList* local_cleanup_list,
1631 1657 OldRegionSet* old_proxy_set,
1632 1658 HumongousRegionSet* humongous_proxy_set,
1633 1659 HRRSCleanupTask* hrrs_cleanup_task) :
1634 1660 _g1(g1), _worker_num(worker_num),
1635 1661 _max_live_bytes(0), _regions_claimed(0),
1636 1662 _freed_bytes(0),
1637 1663 _claimed_region_time(0.0), _max_region_time(0.0),
1638 1664 _local_cleanup_list(local_cleanup_list),
1639 1665 _old_proxy_set(old_proxy_set),
1640 1666 _humongous_proxy_set(humongous_proxy_set),
1641 1667 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1642 1668
1643 1669 size_t freed_bytes() { return _freed_bytes; }
1644 1670
1645 1671 bool doHeapRegion(HeapRegion *hr) {
1646 1672 if (hr->continuesHumongous()) {
1647 1673 return false;
1648 1674 }
1649 1675 // We use a claim value of zero here because all regions
1650 1676 // were claimed with value 1 in the FinalCount task.
1651 1677 _g1->reset_gc_time_stamps(hr);
1652 1678 double start = os::elapsedTime();
1653 1679 _regions_claimed++;
1654 1680 hr->note_end_of_marking();
1655 1681 _max_live_bytes += hr->max_live_bytes();
1656 1682 _g1->free_region_if_empty(hr,
1657 1683 &_freed_bytes,
1658 1684 _local_cleanup_list,
1659 1685 _old_proxy_set,
1660 1686 _humongous_proxy_set,
1661 1687 _hrrs_cleanup_task,
1662 1688 true /* par */);
1663 1689 double region_time = (os::elapsedTime() - start);
1664 1690 _claimed_region_time += region_time;
1665 1691 if (region_time > _max_region_time) {
1666 1692 _max_region_time = region_time;
1667 1693 }
1668 1694 return false;
1669 1695 }
1670 1696
1671 1697 size_t max_live_bytes() { return _max_live_bytes; }
1672 1698 uint regions_claimed() { return _regions_claimed; }
1673 1699 double claimed_region_time_sec() { return _claimed_region_time; }
1674 1700 double max_region_time_sec() { return _max_region_time; }
1675 1701 };
1676 1702
1677 1703 class G1ParNoteEndTask: public AbstractGangTask {
1678 1704 friend class G1NoteEndOfConcMarkClosure;
1679 1705
1680 1706 protected:
1681 1707 G1CollectedHeap* _g1h;
1682 1708 size_t _max_live_bytes;
1683 1709 size_t _freed_bytes;
1684 1710 FreeRegionList* _cleanup_list;
1685 1711
1686 1712 public:
1687 1713 G1ParNoteEndTask(G1CollectedHeap* g1h,
1688 1714 FreeRegionList* cleanup_list) :
1689 1715 AbstractGangTask("G1 note end"), _g1h(g1h),
1690 1716 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1691 1717
1692 1718 void work(uint worker_id) {
1693 1719 double start = os::elapsedTime();
1694 1720 FreeRegionList local_cleanup_list("Local Cleanup List");
1695 1721 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1696 1722 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1697 1723 HRRSCleanupTask hrrs_cleanup_task;
1698 1724 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1699 1725 &old_proxy_set,
1700 1726 &humongous_proxy_set,
1701 1727 &hrrs_cleanup_task);
1702 1728 if (G1CollectedHeap::use_parallel_gc_threads()) {
1703 1729 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1704 1730 _g1h->workers()->active_workers(),
1705 1731 HeapRegion::NoteEndClaimValue);
1706 1732 } else {
1707 1733 _g1h->heap_region_iterate(&g1_note_end);
1708 1734 }
1709 1735 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1710 1736
1711 1737 // Now update the lists
1712 1738 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1713 1739 NULL /* free_list */,
1714 1740 &old_proxy_set,
1715 1741 &humongous_proxy_set,
1716 1742 true /* par */);
1717 1743 {
1718 1744 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1719 1745 _max_live_bytes += g1_note_end.max_live_bytes();
1720 1746 _freed_bytes += g1_note_end.freed_bytes();
1721 1747
1722 1748 // If we iterate over the global cleanup list at the end of
1723 1749 // cleanup to do this printing we will not guarantee to only
1724 1750 // generate output for the newly-reclaimed regions (the list
1725 1751 // might not be empty at the beginning of cleanup; we might
1726 1752 // still be working on its previous contents). So we do the
1727 1753 // printing here, before we append the new regions to the global
1728 1754 // cleanup list.
1729 1755
1730 1756 G1HRPrinter* hr_printer = _g1h->hr_printer();
1731 1757 if (hr_printer->is_active()) {
1732 1758 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1733 1759 while (iter.more_available()) {
1734 1760 HeapRegion* hr = iter.get_next();
1735 1761 hr_printer->cleanup(hr);
1736 1762 }
1737 1763 }
1738 1764
1739 1765 _cleanup_list->add_as_tail(&local_cleanup_list);
1740 1766 assert(local_cleanup_list.is_empty(), "post-condition");
1741 1767
1742 1768 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1743 1769 }
1744 1770 }
1745 1771 size_t max_live_bytes() { return _max_live_bytes; }
1746 1772 size_t freed_bytes() { return _freed_bytes; }
1747 1773 };
1748 1774
1749 1775 class G1ParScrubRemSetTask: public AbstractGangTask {
1750 1776 protected:
1751 1777 G1RemSet* _g1rs;
1752 1778 BitMap* _region_bm;
1753 1779 BitMap* _card_bm;
1754 1780 public:
1755 1781 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1756 1782 BitMap* region_bm, BitMap* card_bm) :
1757 1783 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1758 1784 _region_bm(region_bm), _card_bm(card_bm) { }
1759 1785
1760 1786 void work(uint worker_id) {
1761 1787 if (G1CollectedHeap::use_parallel_gc_threads()) {
1762 1788 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1763 1789 HeapRegion::ScrubRemSetClaimValue);
1764 1790 } else {
1765 1791 _g1rs->scrub(_region_bm, _card_bm);
1766 1792 }
1767 1793 }
1768 1794
1769 1795 };
1770 1796
1771 1797 void ConcurrentMark::cleanup() {
1772 1798 // world is stopped at this checkpoint
1773 1799 assert(SafepointSynchronize::is_at_safepoint(),
1774 1800 "world should be stopped");
1775 1801 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1776 1802
1777 1803 // If a full collection has happened, we shouldn't do this.
1778 1804 if (has_aborted()) {
1779 1805 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1780 1806 return;
1781 1807 }
1782 1808
1783 1809 HRSPhaseSetter x(HRSPhaseCleanup);
1784 1810 g1h->verify_region_sets_optional();
1785 1811
1786 1812 if (VerifyDuringGC) {
1787 1813 HandleMark hm; // handle scope
1788 1814 gclog_or_tty->print(" VerifyDuringGC:(before)");
1789 1815 Universe::heap()->prepare_for_verify();
1790 1816 Universe::verify(/* silent */ false,
1791 1817 /* option */ VerifyOption_G1UsePrevMarking);
1792 1818 }
1793 1819
1794 1820 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1795 1821 g1p->record_concurrent_mark_cleanup_start();
1796 1822
1797 1823 double start = os::elapsedTime();
1798 1824
1799 1825 HeapRegionRemSet::reset_for_cleanup_tasks();
1800 1826
1801 1827 uint n_workers;
1802 1828
1803 1829 // Do counting once more with the world stopped for good measure.
1804 1830 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1805 1831
1806 1832 if (G1CollectedHeap::use_parallel_gc_threads()) {
1807 1833 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1808 1834 "sanity check");
1809 1835
1810 1836 g1h->set_par_threads();
1811 1837 n_workers = g1h->n_par_threads();
1812 1838 assert(g1h->n_par_threads() == n_workers,
1813 1839 "Should not have been reset");
1814 1840 g1h->workers()->run_task(&g1_par_count_task);
1815 1841 // Done with the parallel phase so reset to 0.
1816 1842 g1h->set_par_threads(0);
1817 1843
1818 1844 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1819 1845 "sanity check");
1820 1846 } else {
1821 1847 n_workers = 1;
1822 1848 g1_par_count_task.work(0);
1823 1849 }
1824 1850
1825 1851 if (VerifyDuringGC) {
1826 1852 // Verify that the counting data accumulated during marking matches
1827 1853 // that calculated by walking the marking bitmap.
1828 1854
1829 1855 // Bitmaps to hold expected values
1830 1856 BitMap expected_region_bm(_region_bm.size(), false);
1831 1857 BitMap expected_card_bm(_card_bm.size(), false);
1832 1858
1833 1859 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1834 1860 &_region_bm,
1835 1861 &_card_bm,
1836 1862 &expected_region_bm,
1837 1863 &expected_card_bm);
1838 1864
1839 1865 if (G1CollectedHeap::use_parallel_gc_threads()) {
1840 1866 g1h->set_par_threads((int)n_workers);
1841 1867 g1h->workers()->run_task(&g1_par_verify_task);
1842 1868 // Done with the parallel phase so reset to 0.
1843 1869 g1h->set_par_threads(0);
1844 1870
1845 1871 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1846 1872 "sanity check");
1847 1873 } else {
1848 1874 g1_par_verify_task.work(0);
1849 1875 }
1850 1876
1851 1877 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1852 1878 }
1853 1879
1854 1880 size_t start_used_bytes = g1h->used();
1855 1881 g1h->set_marking_complete();
1856 1882
1857 1883 double count_end = os::elapsedTime();
1858 1884 double this_final_counting_time = (count_end - start);
1859 1885 _total_counting_time += this_final_counting_time;
1860 1886
1861 1887 if (G1PrintRegionLivenessInfo) {
1862 1888 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1863 1889 _g1h->heap_region_iterate(&cl);
1864 1890 }
1865 1891
1866 1892 // Install newly created mark bitMap as "prev".
1867 1893 swapMarkBitMaps();
1868 1894
1869 1895 g1h->reset_gc_time_stamp();
1870 1896
1871 1897 // Note end of marking in all heap regions.
1872 1898 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1873 1899 if (G1CollectedHeap::use_parallel_gc_threads()) {
1874 1900 g1h->set_par_threads((int)n_workers);
1875 1901 g1h->workers()->run_task(&g1_par_note_end_task);
1876 1902 g1h->set_par_threads(0);
1877 1903
1878 1904 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1879 1905 "sanity check");
1880 1906 } else {
1881 1907 g1_par_note_end_task.work(0);
1882 1908 }
1883 1909 g1h->check_gc_time_stamps();
1884 1910
1885 1911 if (!cleanup_list_is_empty()) {
1886 1912 // The cleanup list is not empty, so we'll have to process it
1887 1913 // concurrently. Notify anyone else that might be wanting free
1888 1914 // regions that there will be more free regions coming soon.
1889 1915 g1h->set_free_regions_coming();
1890 1916 }
1891 1917
1892 1918 // call below, since it affects the metric by which we sort the heap
1893 1919 // regions.
1894 1920 if (G1ScrubRemSets) {
1895 1921 double rs_scrub_start = os::elapsedTime();
1896 1922 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1897 1923 if (G1CollectedHeap::use_parallel_gc_threads()) {
1898 1924 g1h->set_par_threads((int)n_workers);
1899 1925 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1900 1926 g1h->set_par_threads(0);
1901 1927
1902 1928 assert(g1h->check_heap_region_claim_values(
1903 1929 HeapRegion::ScrubRemSetClaimValue),
1904 1930 "sanity check");
1905 1931 } else {
1906 1932 g1_par_scrub_rs_task.work(0);
1907 1933 }
1908 1934
1909 1935 double rs_scrub_end = os::elapsedTime();
1910 1936 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1911 1937 _total_rs_scrub_time += this_rs_scrub_time;
1912 1938 }
1913 1939
1914 1940 // this will also free any regions totally full of garbage objects,
1915 1941 // and sort the regions.
1916 1942 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1917 1943
1918 1944 // Statistics.
1919 1945 double end = os::elapsedTime();
1920 1946 _cleanup_times.add((end - start) * 1000.0);
1921 1947
1922 1948 if (G1Log::fine()) {
1923 1949 g1h->print_size_transition(gclog_or_tty,
1924 1950 start_used_bytes,
1925 1951 g1h->used(),
1926 1952 g1h->capacity());
1927 1953 }
1928 1954
1929 1955 // Clean up will have freed any regions completely full of garbage.
1930 1956 // Update the soft reference policy with the new heap occupancy.
1931 1957 Universe::update_heap_info_at_gc();
1932 1958
1933 1959 // We need to make this be a "collection" so any collection pause that
1934 1960 // races with it goes around and waits for completeCleanup to finish.
1935 1961 g1h->increment_total_collections();
1936 1962
1937 1963 // We reclaimed old regions so we should calculate the sizes to make
1938 1964 // sure we update the old gen/space data.
1939 1965 g1h->g1mm()->update_sizes();
1940 1966
1941 1967 if (VerifyDuringGC) {
1942 1968 HandleMark hm; // handle scope
1943 1969 gclog_or_tty->print(" VerifyDuringGC:(after)");
1944 1970 Universe::heap()->prepare_for_verify();
1945 1971 Universe::verify(/* silent */ false,
1946 1972 /* option */ VerifyOption_G1UsePrevMarking);
1947 1973 }
1948 1974
1949 1975 g1h->verify_region_sets_optional();
1950 1976 }
1951 1977
1952 1978 void ConcurrentMark::completeCleanup() {
1953 1979 if (has_aborted()) return;
1954 1980
1955 1981 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1956 1982
1957 1983 _cleanup_list.verify_optional();
1958 1984 FreeRegionList tmp_free_list("Tmp Free List");
1959 1985
1960 1986 if (G1ConcRegionFreeingVerbose) {
1961 1987 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1962 1988 "cleanup list has %u entries",
1963 1989 _cleanup_list.length());
1964 1990 }
1965 1991
1966 1992 // Noone else should be accessing the _cleanup_list at this point,
1967 1993 // so it's not necessary to take any locks
1968 1994 while (!_cleanup_list.is_empty()) {
1969 1995 HeapRegion* hr = _cleanup_list.remove_head();
1970 1996 assert(hr != NULL, "the list was not empty");
1971 1997 hr->par_clear();
1972 1998 tmp_free_list.add_as_tail(hr);
1973 1999
1974 2000 // Instead of adding one region at a time to the secondary_free_list,
1975 2001 // we accumulate them in the local list and move them a few at a
1976 2002 // time. This also cuts down on the number of notify_all() calls
1977 2003 // we do during this process. We'll also append the local list when
1978 2004 // _cleanup_list is empty (which means we just removed the last
1979 2005 // region from the _cleanup_list).
1980 2006 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1981 2007 _cleanup_list.is_empty()) {
1982 2008 if (G1ConcRegionFreeingVerbose) {
1983 2009 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1984 2010 "appending %u entries to the secondary_free_list, "
1985 2011 "cleanup list still has %u entries",
1986 2012 tmp_free_list.length(),
1987 2013 _cleanup_list.length());
1988 2014 }
1989 2015
1990 2016 {
1991 2017 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1992 2018 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1993 2019 SecondaryFreeList_lock->notify_all();
1994 2020 }
1995 2021
1996 2022 if (G1StressConcRegionFreeing) {
1997 2023 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1998 2024 os::sleep(Thread::current(), (jlong) 1, false);
1999 2025 }
2000 2026 }
2001 2027 }
2002 2028 }
2003 2029 assert(tmp_free_list.is_empty(), "post-condition");
2004 2030 }
2005 2031
2006 2032 // Support closures for reference procssing in G1
2007 2033
2008 2034 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2009 2035 HeapWord* addr = (HeapWord*)obj;
2010 2036 return addr != NULL &&
2011 2037 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2012 2038 }
2013 2039
2014 2040 class G1CMKeepAliveClosure: public OopClosure {
2015 2041 G1CollectedHeap* _g1;
2016 2042 ConcurrentMark* _cm;
2017 2043 public:
2018 2044 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2019 2045 _g1(g1), _cm(cm) {
2020 2046 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2021 2047 }
2022 2048
2023 2049 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2024 2050 virtual void do_oop( oop* p) { do_oop_work(p); }
2025 2051
2026 2052 template <class T> void do_oop_work(T* p) {
2027 2053 oop obj = oopDesc::load_decode_heap_oop(p);
2028 2054 HeapWord* addr = (HeapWord*)obj;
2029 2055
2030 2056 if (_cm->verbose_high()) {
2031 2057 gclog_or_tty->print_cr("\t[0] we're looking at location "
2032 2058 "*"PTR_FORMAT" = "PTR_FORMAT,
2033 2059 p, (void*) obj);
2034 2060 }
2035 2061
2036 2062 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2037 2063 _cm->mark_and_count(obj);
2038 2064 _cm->mark_stack_push(obj);
2039 2065 }
2040 2066 }
2041 2067 };
2042 2068
2043 2069 class G1CMDrainMarkingStackClosure: public VoidClosure {
2044 2070 ConcurrentMark* _cm;
2045 2071 CMMarkStack* _markStack;
2046 2072 G1CMKeepAliveClosure* _oopClosure;
2047 2073 public:
2048 2074 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2049 2075 G1CMKeepAliveClosure* oopClosure) :
2050 2076 _cm(cm),
2051 2077 _markStack(markStack),
2052 2078 _oopClosure(oopClosure) { }
2053 2079
2054 2080 void do_void() {
2055 2081 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
2056 2082 }
2057 2083 };
2058 2084
2059 2085 // 'Keep Alive' closure used by parallel reference processing.
2060 2086 // An instance of this closure is used in the parallel reference processing
2061 2087 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2062 2088 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2063 2089 // placed on to discovered ref lists once so we can mark and push with no
2064 2090 // need to check whether the object has already been marked. Using the
2065 2091 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2066 2092 // operating on the global mark stack. This means that an individual
2067 2093 // worker would be doing lock-free pushes while it processes its own
2068 2094 // discovered ref list followed by drain call. If the discovered ref lists
2069 2095 // are unbalanced then this could cause interference with the other
2070 2096 // workers. Using a CMTask (and its embedded local data structures)
2071 2097 // avoids that potential interference.
2072 2098 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2073 2099 ConcurrentMark* _cm;
2074 2100 CMTask* _task;
2075 2101 int _ref_counter_limit;
2076 2102 int _ref_counter;
2077 2103 public:
2078 2104 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2079 2105 _cm(cm), _task(task),
2080 2106 _ref_counter_limit(G1RefProcDrainInterval) {
2081 2107 assert(_ref_counter_limit > 0, "sanity");
2082 2108 _ref_counter = _ref_counter_limit;
2083 2109 }
2084 2110
2085 2111 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2086 2112 virtual void do_oop( oop* p) { do_oop_work(p); }
2087 2113
2088 2114 template <class T> void do_oop_work(T* p) {
2089 2115 if (!_cm->has_overflown()) {
2090 2116 oop obj = oopDesc::load_decode_heap_oop(p);
2091 2117 if (_cm->verbose_high()) {
2092 2118 gclog_or_tty->print_cr("\t[%d] we're looking at location "
2093 2119 "*"PTR_FORMAT" = "PTR_FORMAT,
2094 2120 _task->task_id(), p, (void*) obj);
2095 2121 }
2096 2122
2097 2123 _task->deal_with_reference(obj);
2098 2124 _ref_counter--;
2099 2125
2100 2126 if (_ref_counter == 0) {
2101 2127 // We have dealt with _ref_counter_limit references, pushing them and objects
2102 2128 // reachable from them on to the local stack (and possibly the global stack).
2103 2129 // Call do_marking_step() to process these entries. We call the routine in a
2104 2130 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2105 2131 // with the entries that we've pushed as a result of the deal_with_reference
2106 2132 // calls above) or we overflow.
2107 2133 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2108 2134 // while there may still be some work to do. (See the comment at the
2109 2135 // beginning of CMTask::do_marking_step() for those conditions - one of which
2110 2136 // is reaching the specified time target.) It is only when
2111 2137 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2112 2138 // that the marking has completed.
2113 2139 do {
2114 2140 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2115 2141 _task->do_marking_step(mark_step_duration_ms,
2116 2142 false /* do_stealing */,
2117 2143 false /* do_termination */);
2118 2144 } while (_task->has_aborted() && !_cm->has_overflown());
2119 2145 _ref_counter = _ref_counter_limit;
2120 2146 }
2121 2147 } else {
2122 2148 if (_cm->verbose_high()) {
2123 2149 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2124 2150 }
2125 2151 }
2126 2152 }
2127 2153 };
2128 2154
2129 2155 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2130 2156 ConcurrentMark* _cm;
2131 2157 CMTask* _task;
2132 2158 public:
2133 2159 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2134 2160 _cm(cm), _task(task) { }
2135 2161
2136 2162 void do_void() {
2137 2163 do {
2138 2164 if (_cm->verbose_high()) {
2139 2165 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2140 2166 _task->task_id());
2141 2167 }
2142 2168
2143 2169 // We call CMTask::do_marking_step() to completely drain the local and
2144 2170 // global marking stacks. The routine is called in a loop, which we'll
2145 2171 // exit if there's nothing more to do (i.e. we'completely drained the
2146 2172 // entries that were pushed as a result of applying the
2147 2173 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2148 2174 // lists above) or we overflow the global marking stack.
2149 2175 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2150 2176 // while there may still be some work to do. (See the comment at the
2151 2177 // beginning of CMTask::do_marking_step() for those conditions - one of which
2152 2178 // is reaching the specified time target.) It is only when
2153 2179 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2154 2180 // that the marking has completed.
2155 2181
2156 2182 _task->do_marking_step(1000000000.0 /* something very large */,
2157 2183 true /* do_stealing */,
2158 2184 true /* do_termination */);
2159 2185 } while (_task->has_aborted() && !_cm->has_overflown());
2160 2186 }
2161 2187 };
2162 2188
2163 2189 // Implementation of AbstractRefProcTaskExecutor for parallel
2164 2190 // reference processing at the end of G1 concurrent marking
2165 2191
2166 2192 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2167 2193 private:
2168 2194 G1CollectedHeap* _g1h;
2169 2195 ConcurrentMark* _cm;
2170 2196 WorkGang* _workers;
2171 2197 int _active_workers;
2172 2198
2173 2199 public:
2174 2200 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2175 2201 ConcurrentMark* cm,
2176 2202 WorkGang* workers,
2177 2203 int n_workers) :
2178 2204 _g1h(g1h), _cm(cm),
2179 2205 _workers(workers), _active_workers(n_workers) { }
2180 2206
2181 2207 // Executes the given task using concurrent marking worker threads.
2182 2208 virtual void execute(ProcessTask& task);
2183 2209 virtual void execute(EnqueueTask& task);
2184 2210 };
2185 2211
2186 2212 class G1CMRefProcTaskProxy: public AbstractGangTask {
2187 2213 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2188 2214 ProcessTask& _proc_task;
2189 2215 G1CollectedHeap* _g1h;
2190 2216 ConcurrentMark* _cm;
2191 2217
2192 2218 public:
2193 2219 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2194 2220 G1CollectedHeap* g1h,
2195 2221 ConcurrentMark* cm) :
2196 2222 AbstractGangTask("Process reference objects in parallel"),
2197 2223 _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2198 2224
2199 2225 virtual void work(uint worker_id) {
2200 2226 CMTask* marking_task = _cm->task(worker_id);
2201 2227 G1CMIsAliveClosure g1_is_alive(_g1h);
2202 2228 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2203 2229 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2204 2230
2205 2231 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2206 2232 }
2207 2233 };
2208 2234
2209 2235 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2210 2236 assert(_workers != NULL, "Need parallel worker threads.");
2211 2237
2212 2238 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2213 2239
2214 2240 // We need to reset the phase for each task execution so that
2215 2241 // the termination protocol of CMTask::do_marking_step works.
2216 2242 _cm->set_phase(_active_workers, false /* concurrent */);
2217 2243 _g1h->set_par_threads(_active_workers);
2218 2244 _workers->run_task(&proc_task_proxy);
2219 2245 _g1h->set_par_threads(0);
2220 2246 }
2221 2247
2222 2248 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2223 2249 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2224 2250 EnqueueTask& _enq_task;
2225 2251
2226 2252 public:
2227 2253 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2228 2254 AbstractGangTask("Enqueue reference objects in parallel"),
2229 2255 _enq_task(enq_task) { }
2230 2256
2231 2257 virtual void work(uint worker_id) {
2232 2258 _enq_task.work(worker_id);
2233 2259 }
2234 2260 };
2235 2261
2236 2262 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2237 2263 assert(_workers != NULL, "Need parallel worker threads.");
2238 2264
2239 2265 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2240 2266
2241 2267 _g1h->set_par_threads(_active_workers);
2242 2268 _workers->run_task(&enq_task_proxy);
2243 2269 _g1h->set_par_threads(0);
2244 2270 }
2245 2271
2246 2272 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2247 2273 ResourceMark rm;
2248 2274 HandleMark hm;
2249 2275
2250 2276 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2251 2277
2252 2278 // Is alive closure.
2253 2279 G1CMIsAliveClosure g1_is_alive(g1h);
2254 2280
2255 2281 // Inner scope to exclude the cleaning of the string and symbol
2256 2282 // tables from the displayed time.
2257 2283 {
2258 2284 if (G1Log::finer()) {
2259 2285 gclog_or_tty->put(' ');
2260 2286 }
2261 2287 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2262 2288
2263 2289 ReferenceProcessor* rp = g1h->ref_processor_cm();
2264 2290
2265 2291 // See the comment in G1CollectedHeap::ref_processing_init()
2266 2292 // about how reference processing currently works in G1.
2267 2293
2268 2294 // Process weak references.
2269 2295 rp->setup_policy(clear_all_soft_refs);
2270 2296 assert(_markStack.isEmpty(), "mark stack should be empty");
2271 2297
2272 2298 G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2273 2299 G1CMDrainMarkingStackClosure
2274 2300 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2275 2301
2276 2302 // We use the work gang from the G1CollectedHeap and we utilize all
2277 2303 // the worker threads.
2278 2304 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2279 2305 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2280 2306
2281 2307 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2282 2308 g1h->workers(), active_workers);
2283 2309
2284 2310 if (rp->processing_is_mt()) {
2285 2311 // Set the degree of MT here. If the discovery is done MT, there
2286 2312 // may have been a different number of threads doing the discovery
2287 2313 // and a different number of discovered lists may have Ref objects.
2288 2314 // That is OK as long as the Reference lists are balanced (see
2289 2315 // balance_all_queues() and balance_queues()).
2290 2316 rp->set_active_mt_degree(active_workers);
2291 2317
2292 2318 rp->process_discovered_references(&g1_is_alive,
2293 2319 &g1_keep_alive,
2294 2320 &g1_drain_mark_stack,
2295 2321 &par_task_executor);
2296 2322
2297 2323 // The work routines of the parallel keep_alive and drain_marking_stack
2298 2324 // will set the has_overflown flag if we overflow the global marking
2299 2325 // stack.
2300 2326 } else {
2301 2327 rp->process_discovered_references(&g1_is_alive,
2302 2328 &g1_keep_alive,
2303 2329 &g1_drain_mark_stack,
2304 2330 NULL);
2305 2331 }
2306 2332
2307 2333 assert(_markStack.overflow() || _markStack.isEmpty(),
2308 2334 "mark stack should be empty (unless it overflowed)");
2309 2335 if (_markStack.overflow()) {
2310 2336 // Should have been done already when we tried to push an
2311 2337 // entry on to the global mark stack. But let's do it again.
2312 2338 set_has_overflown();
2313 2339 }
2314 2340
2315 2341 if (rp->processing_is_mt()) {
2316 2342 assert(rp->num_q() == active_workers, "why not");
2317 2343 rp->enqueue_discovered_references(&par_task_executor);
2318 2344 } else {
2319 2345 rp->enqueue_discovered_references();
2320 2346 }
2321 2347
2322 2348 rp->verify_no_references_recorded();
2323 2349 assert(!rp->discovery_enabled(), "Post condition");
2324 2350 }
2325 2351
2326 2352 // Now clean up stale oops in StringTable
2327 2353 StringTable::unlink(&g1_is_alive);
2328 2354 // Clean up unreferenced symbols in symbol table.
2329 2355 SymbolTable::unlink();
2330 2356 }
2331 2357
2332 2358 void ConcurrentMark::swapMarkBitMaps() {
2333 2359 CMBitMapRO* temp = _prevMarkBitMap;
2334 2360 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2335 2361 _nextMarkBitMap = (CMBitMap*) temp;
2336 2362 }
2337 2363
2338 2364 class CMRemarkTask: public AbstractGangTask {
2339 2365 private:
2340 2366 ConcurrentMark *_cm;
2341 2367
2342 2368 public:
2343 2369 void work(uint worker_id) {
2344 2370 // Since all available tasks are actually started, we should
2345 2371 // only proceed if we're supposed to be actived.
2346 2372 if (worker_id < _cm->active_tasks()) {
2347 2373 CMTask* task = _cm->task(worker_id);
2348 2374 task->record_start_time();
2349 2375 do {
2350 2376 task->do_marking_step(1000000000.0 /* something very large */,
2351 2377 true /* do_stealing */,
2352 2378 true /* do_termination */);
2353 2379 } while (task->has_aborted() && !_cm->has_overflown());
2354 2380 // If we overflow, then we do not want to restart. We instead
2355 2381 // want to abort remark and do concurrent marking again.
2356 2382 task->record_end_time();
2357 2383 }
2358 2384 }
2359 2385
2360 2386 CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2361 2387 AbstractGangTask("Par Remark"), _cm(cm) {
2362 2388 _cm->terminator()->reset_for_reuse(active_workers);
2363 2389 }
2364 2390 };
2365 2391
2366 2392 void ConcurrentMark::checkpointRootsFinalWork() {
2367 2393 ResourceMark rm;
2368 2394 HandleMark hm;
2369 2395 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2370 2396
2371 2397 g1h->ensure_parsability(false);
2372 2398
2373 2399 if (G1CollectedHeap::use_parallel_gc_threads()) {
2374 2400 G1CollectedHeap::StrongRootsScope srs(g1h);
2375 2401 // this is remark, so we'll use up all active threads
2376 2402 uint active_workers = g1h->workers()->active_workers();
2377 2403 if (active_workers == 0) {
2378 2404 assert(active_workers > 0, "Should have been set earlier");
2379 2405 active_workers = (uint) ParallelGCThreads;
2380 2406 g1h->workers()->set_active_workers(active_workers);
2381 2407 }
2382 2408 set_phase(active_workers, false /* concurrent */);
2383 2409 // Leave _parallel_marking_threads at it's
2384 2410 // value originally calculated in the ConcurrentMark
2385 2411 // constructor and pass values of the active workers
2386 2412 // through the gang in the task.
2387 2413
2388 2414 CMRemarkTask remarkTask(this, active_workers);
2389 2415 g1h->set_par_threads(active_workers);
2390 2416 g1h->workers()->run_task(&remarkTask);
2391 2417 g1h->set_par_threads(0);
2392 2418 } else {
2393 2419 G1CollectedHeap::StrongRootsScope srs(g1h);
2394 2420 // this is remark, so we'll use up all available threads
2395 2421 uint active_workers = 1;
2396 2422 set_phase(active_workers, false /* concurrent */);
2397 2423
2398 2424 CMRemarkTask remarkTask(this, active_workers);
2399 2425 // We will start all available threads, even if we decide that the
2400 2426 // active_workers will be fewer. The extra ones will just bail out
2401 2427 // immediately.
2402 2428 remarkTask.work(0);
2403 2429 }
2404 2430 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2405 2431 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2406 2432
2407 2433 print_stats();
2408 2434
2409 2435 #if VERIFY_OBJS_PROCESSED
2410 2436 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2411 2437 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2412 2438 _scan_obj_cl.objs_processed,
2413 2439 ThreadLocalObjQueue::objs_enqueued);
2414 2440 guarantee(_scan_obj_cl.objs_processed ==
2415 2441 ThreadLocalObjQueue::objs_enqueued,
2416 2442 "Different number of objs processed and enqueued.");
2417 2443 }
2418 2444 #endif
2419 2445 }
2420 2446
2421 2447 #ifndef PRODUCT
2422 2448
2423 2449 class PrintReachableOopClosure: public OopClosure {
2424 2450 private:
2425 2451 G1CollectedHeap* _g1h;
2426 2452 outputStream* _out;
2427 2453 VerifyOption _vo;
2428 2454 bool _all;
2429 2455
2430 2456 public:
2431 2457 PrintReachableOopClosure(outputStream* out,
2432 2458 VerifyOption vo,
2433 2459 bool all) :
2434 2460 _g1h(G1CollectedHeap::heap()),
2435 2461 _out(out), _vo(vo), _all(all) { }
2436 2462
2437 2463 void do_oop(narrowOop* p) { do_oop_work(p); }
2438 2464 void do_oop( oop* p) { do_oop_work(p); }
2439 2465
2440 2466 template <class T> void do_oop_work(T* p) {
2441 2467 oop obj = oopDesc::load_decode_heap_oop(p);
2442 2468 const char* str = NULL;
2443 2469 const char* str2 = "";
2444 2470
2445 2471 if (obj == NULL) {
2446 2472 str = "";
2447 2473 } else if (!_g1h->is_in_g1_reserved(obj)) {
2448 2474 str = " O";
2449 2475 } else {
2450 2476 HeapRegion* hr = _g1h->heap_region_containing(obj);
2451 2477 guarantee(hr != NULL, "invariant");
2452 2478 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2453 2479 bool marked = _g1h->is_marked(obj, _vo);
2454 2480
2455 2481 if (over_tams) {
2456 2482 str = " >";
2457 2483 if (marked) {
2458 2484 str2 = " AND MARKED";
2459 2485 }
2460 2486 } else if (marked) {
2461 2487 str = " M";
2462 2488 } else {
2463 2489 str = " NOT";
2464 2490 }
2465 2491 }
2466 2492
2467 2493 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2468 2494 p, (void*) obj, str, str2);
2469 2495 }
2470 2496 };
2471 2497
2472 2498 class PrintReachableObjectClosure : public ObjectClosure {
2473 2499 private:
2474 2500 G1CollectedHeap* _g1h;
2475 2501 outputStream* _out;
2476 2502 VerifyOption _vo;
2477 2503 bool _all;
2478 2504 HeapRegion* _hr;
2479 2505
2480 2506 public:
2481 2507 PrintReachableObjectClosure(outputStream* out,
2482 2508 VerifyOption vo,
2483 2509 bool all,
2484 2510 HeapRegion* hr) :
2485 2511 _g1h(G1CollectedHeap::heap()),
2486 2512 _out(out), _vo(vo), _all(all), _hr(hr) { }
2487 2513
2488 2514 void do_object(oop o) {
2489 2515 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2490 2516 bool marked = _g1h->is_marked(o, _vo);
2491 2517 bool print_it = _all || over_tams || marked;
2492 2518
2493 2519 if (print_it) {
2494 2520 _out->print_cr(" "PTR_FORMAT"%s",
2495 2521 o, (over_tams) ? " >" : (marked) ? " M" : "");
2496 2522 PrintReachableOopClosure oopCl(_out, _vo, _all);
2497 2523 o->oop_iterate(&oopCl);
2498 2524 }
2499 2525 }
2500 2526 };
2501 2527
2502 2528 class PrintReachableRegionClosure : public HeapRegionClosure {
2503 2529 private:
2504 2530 G1CollectedHeap* _g1h;
2505 2531 outputStream* _out;
2506 2532 VerifyOption _vo;
2507 2533 bool _all;
2508 2534
2509 2535 public:
2510 2536 bool doHeapRegion(HeapRegion* hr) {
2511 2537 HeapWord* b = hr->bottom();
2512 2538 HeapWord* e = hr->end();
2513 2539 HeapWord* t = hr->top();
2514 2540 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2515 2541 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2516 2542 "TAMS: "PTR_FORMAT, b, e, t, p);
2517 2543 _out->cr();
2518 2544
2519 2545 HeapWord* from = b;
2520 2546 HeapWord* to = t;
2521 2547
2522 2548 if (to > from) {
2523 2549 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2524 2550 _out->cr();
2525 2551 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2526 2552 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2527 2553 _out->cr();
2528 2554 }
2529 2555
2530 2556 return false;
2531 2557 }
2532 2558
2533 2559 PrintReachableRegionClosure(outputStream* out,
2534 2560 VerifyOption vo,
2535 2561 bool all) :
2536 2562 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2537 2563 };
2538 2564
2539 2565 void ConcurrentMark::print_reachable(const char* str,
2540 2566 VerifyOption vo,
2541 2567 bool all) {
2542 2568 gclog_or_tty->cr();
2543 2569 gclog_or_tty->print_cr("== Doing heap dump... ");
2544 2570
2545 2571 if (G1PrintReachableBaseFile == NULL) {
2546 2572 gclog_or_tty->print_cr(" #### error: no base file defined");
2547 2573 return;
2548 2574 }
2549 2575
2550 2576 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2551 2577 (JVM_MAXPATHLEN - 1)) {
2552 2578 gclog_or_tty->print_cr(" #### error: file name too long");
2553 2579 return;
2554 2580 }
2555 2581
2556 2582 char file_name[JVM_MAXPATHLEN];
2557 2583 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2558 2584 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2559 2585
2560 2586 fileStream fout(file_name);
2561 2587 if (!fout.is_open()) {
2562 2588 gclog_or_tty->print_cr(" #### error: could not open file");
2563 2589 return;
2564 2590 }
2565 2591
2566 2592 outputStream* out = &fout;
2567 2593 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2568 2594 out->cr();
2569 2595
2570 2596 out->print_cr("--- ITERATING OVER REGIONS");
2571 2597 out->cr();
2572 2598 PrintReachableRegionClosure rcl(out, vo, all);
2573 2599 _g1h->heap_region_iterate(&rcl);
2574 2600 out->cr();
2575 2601
2576 2602 gclog_or_tty->print_cr(" done");
2577 2603 gclog_or_tty->flush();
2578 2604 }
2579 2605
2580 2606 #endif // PRODUCT
2581 2607
2582 2608 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2583 2609 // Note we are overriding the read-only view of the prev map here, via
2584 2610 // the cast.
2585 2611 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2586 2612 }
2587 2613
2588 2614 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2589 2615 _nextMarkBitMap->clearRange(mr);
2590 2616 }
2591 2617
2592 2618 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2593 2619 clearRangePrevBitmap(mr);
2594 2620 clearRangeNextBitmap(mr);
2595 2621 }
2596 2622
2597 2623 HeapRegion*
2598 2624 ConcurrentMark::claim_region(int task_num) {
2599 2625 // "checkpoint" the finger
2600 2626 HeapWord* finger = _finger;
2601 2627
2602 2628 // _heap_end will not change underneath our feet; it only changes at
2603 2629 // yield points.
2604 2630 while (finger < _heap_end) {
2605 2631 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2606 2632
2607 2633 // Note on how this code handles humongous regions. In the
2608 2634 // normal case the finger will reach the start of a "starts
2609 2635 // humongous" (SH) region. Its end will either be the end of the
2610 2636 // last "continues humongous" (CH) region in the sequence, or the
2611 2637 // standard end of the SH region (if the SH is the only region in
2612 2638 // the sequence). That way claim_region() will skip over the CH
2613 2639 // regions. However, there is a subtle race between a CM thread
2614 2640 // executing this method and a mutator thread doing a humongous
2615 2641 // object allocation. The two are not mutually exclusive as the CM
2616 2642 // thread does not need to hold the Heap_lock when it gets
2617 2643 // here. So there is a chance that claim_region() will come across
2618 2644 // a free region that's in the progress of becoming a SH or a CH
2619 2645 // region. In the former case, it will either
2620 2646 // a) Miss the update to the region's end, in which case it will
2621 2647 // visit every subsequent CH region, will find their bitmaps
2622 2648 // empty, and do nothing, or
2623 2649 // b) Will observe the update of the region's end (in which case
2624 2650 // it will skip the subsequent CH regions).
2625 2651 // If it comes across a region that suddenly becomes CH, the
2626 2652 // scenario will be similar to b). So, the race between
2627 2653 // claim_region() and a humongous object allocation might force us
2628 2654 // to do a bit of unnecessary work (due to some unnecessary bitmap
2629 2655 // iterations) but it should not introduce and correctness issues.
2630 2656 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2631 2657 HeapWord* bottom = curr_region->bottom();
2632 2658 HeapWord* end = curr_region->end();
2633 2659 HeapWord* limit = curr_region->next_top_at_mark_start();
2634 2660
2635 2661 if (verbose_low()) {
2636 2662 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2637 2663 "["PTR_FORMAT", "PTR_FORMAT"), "
2638 2664 "limit = "PTR_FORMAT,
2639 2665 task_num, curr_region, bottom, end, limit);
2640 2666 }
2641 2667
2642 2668 // Is the gap between reading the finger and doing the CAS too long?
2643 2669 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2644 2670 if (res == finger) {
2645 2671 // we succeeded
2646 2672
2647 2673 // notice that _finger == end cannot be guaranteed here since,
2648 2674 // someone else might have moved the finger even further
2649 2675 assert(_finger >= end, "the finger should have moved forward");
2650 2676
2651 2677 if (verbose_low()) {
2652 2678 gclog_or_tty->print_cr("[%d] we were successful with region = "
2653 2679 PTR_FORMAT, task_num, curr_region);
2654 2680 }
2655 2681
2656 2682 if (limit > bottom) {
2657 2683 if (verbose_low()) {
2658 2684 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2659 2685 "returning it ", task_num, curr_region);
2660 2686 }
2661 2687 return curr_region;
2662 2688 } else {
2663 2689 assert(limit == bottom,
2664 2690 "the region limit should be at bottom");
2665 2691 if (verbose_low()) {
2666 2692 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2667 2693 "returning NULL", task_num, curr_region);
2668 2694 }
2669 2695 // we return NULL and the caller should try calling
2670 2696 // claim_region() again.
2671 2697 return NULL;
2672 2698 }
2673 2699 } else {
2674 2700 assert(_finger > finger, "the finger should have moved forward");
2675 2701 if (verbose_low()) {
2676 2702 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2677 2703 "global finger = "PTR_FORMAT", "
2678 2704 "our finger = "PTR_FORMAT,
2679 2705 task_num, _finger, finger);
2680 2706 }
2681 2707
2682 2708 // read it again
2683 2709 finger = _finger;
2684 2710 }
2685 2711 }
2686 2712
2687 2713 return NULL;
2688 2714 }
2689 2715
2690 2716 #ifndef PRODUCT
2691 2717 enum VerifyNoCSetOopsPhase {
2692 2718 VerifyNoCSetOopsStack,
2693 2719 VerifyNoCSetOopsQueues,
2694 2720 VerifyNoCSetOopsSATBCompleted,
2695 2721 VerifyNoCSetOopsSATBThread
2696 2722 };
2697 2723
2698 2724 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
2699 2725 private:
2700 2726 G1CollectedHeap* _g1h;
2701 2727 VerifyNoCSetOopsPhase _phase;
2702 2728 int _info;
2703 2729
2704 2730 const char* phase_str() {
2705 2731 switch (_phase) {
2706 2732 case VerifyNoCSetOopsStack: return "Stack";
2707 2733 case VerifyNoCSetOopsQueues: return "Queue";
2708 2734 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2709 2735 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
2710 2736 default: ShouldNotReachHere();
2711 2737 }
2712 2738 return NULL;
2713 2739 }
2714 2740
2715 2741 void do_object_work(oop obj) {
2716 2742 guarantee(!_g1h->obj_in_cs(obj),
2717 2743 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2718 2744 (void*) obj, phase_str(), _info));
2719 2745 }
2720 2746
2721 2747 public:
2722 2748 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2723 2749
2724 2750 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2725 2751 _phase = phase;
2726 2752 _info = info;
2727 2753 }
2728 2754
2729 2755 virtual void do_oop(oop* p) {
2730 2756 oop obj = oopDesc::load_decode_heap_oop(p);
2731 2757 do_object_work(obj);
2732 2758 }
2733 2759
2734 2760 virtual void do_oop(narrowOop* p) {
2735 2761 // We should not come across narrow oops while scanning marking
2736 2762 // stacks and SATB buffers.
2737 2763 ShouldNotReachHere();
2738 2764 }
2739 2765
2740 2766 virtual void do_object(oop obj) {
2741 2767 do_object_work(obj);
2742 2768 }
2743 2769 };
2744 2770
2745 2771 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2746 2772 bool verify_enqueued_buffers,
2747 2773 bool verify_thread_buffers,
2748 2774 bool verify_fingers) {
2749 2775 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2750 2776 if (!G1CollectedHeap::heap()->mark_in_progress()) {
2751 2777 return;
2752 2778 }
2753 2779
2754 2780 VerifyNoCSetOopsClosure cl;
2755 2781
2756 2782 if (verify_stacks) {
2757 2783 // Verify entries on the global mark stack
2758 2784 cl.set_phase(VerifyNoCSetOopsStack);
2759 2785 _markStack.oops_do(&cl);
2760 2786
2761 2787 // Verify entries on the task queues
2762 2788 for (int i = 0; i < (int) _max_task_num; i += 1) {
2763 2789 cl.set_phase(VerifyNoCSetOopsQueues, i);
2764 2790 OopTaskQueue* queue = _task_queues->queue(i);
2765 2791 queue->oops_do(&cl);
2766 2792 }
2767 2793 }
2768 2794
2769 2795 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2770 2796
2771 2797 // Verify entries on the enqueued SATB buffers
2772 2798 if (verify_enqueued_buffers) {
2773 2799 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2774 2800 satb_qs.iterate_completed_buffers_read_only(&cl);
2775 2801 }
2776 2802
2777 2803 // Verify entries on the per-thread SATB buffers
2778 2804 if (verify_thread_buffers) {
2779 2805 cl.set_phase(VerifyNoCSetOopsSATBThread);
2780 2806 satb_qs.iterate_thread_buffers_read_only(&cl);
2781 2807 }
2782 2808
2783 2809 if (verify_fingers) {
2784 2810 // Verify the global finger
2785 2811 HeapWord* global_finger = finger();
2786 2812 if (global_finger != NULL && global_finger < _heap_end) {
2787 2813 // The global finger always points to a heap region boundary. We
2788 2814 // use heap_region_containing_raw() to get the containing region
2789 2815 // given that the global finger could be pointing to a free region
2790 2816 // which subsequently becomes continues humongous. If that
2791 2817 // happens, heap_region_containing() will return the bottom of the
2792 2818 // corresponding starts humongous region and the check below will
2793 2819 // not hold any more.
2794 2820 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2795 2821 guarantee(global_finger == global_hr->bottom(),
2796 2822 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2797 2823 global_finger, HR_FORMAT_PARAMS(global_hr)));
2798 2824 }
2799 2825
2800 2826 // Verify the task fingers
2801 2827 assert(parallel_marking_threads() <= _max_task_num, "sanity");
2802 2828 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2803 2829 CMTask* task = _tasks[i];
2804 2830 HeapWord* task_finger = task->finger();
2805 2831 if (task_finger != NULL && task_finger < _heap_end) {
2806 2832 // See above note on the global finger verification.
2807 2833 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2808 2834 guarantee(task_finger == task_hr->bottom() ||
2809 2835 !task_hr->in_collection_set(),
2810 2836 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2811 2837 task_finger, HR_FORMAT_PARAMS(task_hr)));
2812 2838 }
2813 2839 }
2814 2840 }
2815 2841 }
2816 2842 #endif // PRODUCT
2817 2843
2818 2844 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2819 2845 _markStack.setEmpty();
2820 2846 _markStack.clear_overflow();
2821 2847 if (clear_overflow) {
2822 2848 clear_has_overflown();
2823 2849 } else {
2824 2850 assert(has_overflown(), "pre-condition");
2825 2851 }
2826 2852 _finger = _heap_start;
↓ open down ↓ |
1219 lines elided |
↑ open up ↑ |
2827 2853
2828 2854 for (int i = 0; i < (int)_max_task_num; ++i) {
2829 2855 OopTaskQueue* queue = _task_queues->queue(i);
2830 2856 queue->set_empty();
2831 2857 }
2832 2858 }
2833 2859
2834 2860 // Aggregate the counting data that was constructed concurrently
2835 2861 // with marking.
2836 2862 class AggregateCountDataHRClosure: public HeapRegionClosure {
2863 + G1CollectedHeap* _g1h;
2837 2864 ConcurrentMark* _cm;
2865 + CardTableModRefBS* _ct_bs;
2838 2866 BitMap* _cm_card_bm;
2839 2867 size_t _max_task_num;
2840 2868
2841 2869 public:
2842 - AggregateCountDataHRClosure(ConcurrentMark *cm,
2870 + AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2843 2871 BitMap* cm_card_bm,
2844 2872 size_t max_task_num) :
2845 - _cm(cm), _cm_card_bm(cm_card_bm),
2846 - _max_task_num(max_task_num) { }
2847 -
2848 - bool is_card_aligned(HeapWord* p) {
2849 - return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
2850 - }
2873 + _g1h(g1h), _cm(g1h->concurrent_mark()),
2874 + _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2875 + _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { }
2851 2876
2852 2877 bool doHeapRegion(HeapRegion* hr) {
2853 2878 if (hr->continuesHumongous()) {
2854 2879 // We will ignore these here and process them when their
2855 2880 // associated "starts humongous" region is processed.
2856 2881 // Note that we cannot rely on their associated
2857 2882 // "starts humongous" region to have their bit set to 1
2858 2883 // since, due to the region chunking in the parallel region
2859 2884 // iteration, a "continues humongous" region might be visited
2860 2885 // before its associated "starts humongous".
2861 2886 return false;
2862 2887 }
2863 2888
2864 2889 HeapWord* start = hr->bottom();
2865 2890 HeapWord* limit = hr->next_top_at_mark_start();
2866 2891 HeapWord* end = hr->end();
2867 2892
2868 2893 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2869 2894 err_msg("Preconditions not met - "
2870 2895 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
2871 2896 "top: "PTR_FORMAT", end: "PTR_FORMAT,
2872 2897 start, limit, hr->top(), hr->end()));
2873 2898
2874 2899 assert(hr->next_marked_bytes() == 0, "Precondition");
2875 2900
2876 2901 if (start == limit) {
2877 2902 // NTAMS of this region has not been set so nothing to do.
2878 2903 return false;
2879 2904 }
2880 2905
2881 - assert(is_card_aligned(start), "sanity");
2882 - assert(is_card_aligned(end), "sanity");
2906 + // 'start' should be in the heap.
2907 + assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2908 + // 'end' *may* be just beyone the end of the heap (if hr is the last region)
2909 + assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2883 2910
2884 2911 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2885 2912 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2886 2913 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2887 2914
2888 - // If ntams is not card aligned then we bump the index for
2889 - // limit so that we get the card spanning ntams.
2890 - if (!is_card_aligned(limit)) {
2915 + // If ntams is not card aligned then we bump card bitmap index
2916 + // for limit so that we get the all the cards spanned by
2917 + // the object ending at ntams.
2918 + // Note: if this is the last region in the heap then ntams
2919 + // could be actually just beyond the end of the the heap;
2920 + // limit_idx will then correspond to a (non-existent) card
2921 + // that is also outside the heap.
2922 + if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2891 2923 limit_idx += 1;
2892 2924 }
2893 2925
2894 2926 assert(limit_idx <= end_idx, "or else use atomics");
2895 2927
2896 2928 // Aggregate the "stripe" in the count data associated with hr.
2897 2929 uint hrs_index = hr->hrs_index();
2898 2930 size_t marked_bytes = 0;
2899 2931
2900 2932 for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2901 2933 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2902 2934 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2903 2935
2904 2936 // Fetch the marked_bytes in this region for task i and
2905 2937 // add it to the running total for this region.
2906 2938 marked_bytes += marked_bytes_array[hrs_index];
2907 2939
2908 2940 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
↓ open down ↓ |
8 lines elided |
↑ open up ↑ |
2909 2941 // into the global card bitmap.
2910 2942 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2911 2943
2912 2944 while (scan_idx < limit_idx) {
2913 2945 assert(task_card_bm->at(scan_idx) == true, "should be");
2914 2946 _cm_card_bm->set_bit(scan_idx);
2915 2947 assert(_cm_card_bm->at(scan_idx) == true, "should be");
2916 2948
2917 2949 // BitMap::get_next_one_offset() can handle the case when
2918 2950 // its left_offset parameter is greater than its right_offset
2919 - // parameter. If does, however, have an early exit if
2951 + // parameter. It does, however, have an early exit if
2920 2952 // left_offset == right_offset. So let's limit the value
2921 2953 // passed in for left offset here.
2922 2954 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2923 2955 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2924 2956 }
2925 2957 }
2926 2958
2927 2959 // Update the marked bytes for this region.
2928 2960 hr->add_to_marked_bytes(marked_bytes);
2929 2961
2930 2962 // Next heap region
2931 2963 return false;
2932 2964 }
2933 2965 };
2934 2966
2935 2967 class G1AggregateCountDataTask: public AbstractGangTask {
2936 2968 protected:
2937 2969 G1CollectedHeap* _g1h;
2938 2970 ConcurrentMark* _cm;
2939 2971 BitMap* _cm_card_bm;
2940 2972 size_t _max_task_num;
2941 2973 int _active_workers;
2942 2974
2943 2975 public:
2944 2976 G1AggregateCountDataTask(G1CollectedHeap* g1h,
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
2945 2977 ConcurrentMark* cm,
2946 2978 BitMap* cm_card_bm,
2947 2979 size_t max_task_num,
2948 2980 int n_workers) :
2949 2981 AbstractGangTask("Count Aggregation"),
2950 2982 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2951 2983 _max_task_num(max_task_num),
2952 2984 _active_workers(n_workers) { }
2953 2985
2954 2986 void work(uint worker_id) {
2955 - AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
2987 + AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num);
2956 2988
2957 2989 if (G1CollectedHeap::use_parallel_gc_threads()) {
2958 2990 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2959 2991 _active_workers,
2960 2992 HeapRegion::AggregateCountClaimValue);
2961 2993 } else {
2962 2994 _g1h->heap_region_iterate(&cl);
2963 2995 }
2964 2996 }
2965 2997 };
2966 2998
2967 2999
2968 3000 void ConcurrentMark::aggregate_count_data() {
2969 3001 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
2970 3002 _g1h->workers()->active_workers() :
2971 3003 1);
2972 3004
2973 3005 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2974 3006 _max_task_num, n_workers);
2975 3007
2976 3008 if (G1CollectedHeap::use_parallel_gc_threads()) {
2977 3009 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2978 3010 "sanity check");
2979 3011 _g1h->set_par_threads(n_workers);
2980 3012 _g1h->workers()->run_task(&g1_par_agg_task);
2981 3013 _g1h->set_par_threads(0);
2982 3014
2983 3015 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
2984 3016 "sanity check");
2985 3017 _g1h->reset_heap_region_claim_values();
2986 3018 } else {
2987 3019 g1_par_agg_task.work(0);
2988 3020 }
2989 3021 }
2990 3022
2991 3023 // Clear the per-worker arrays used to store the per-region counting data
2992 3024 void ConcurrentMark::clear_all_count_data() {
2993 3025 // Clear the global card bitmap - it will be filled during
2994 3026 // liveness count aggregation (during remark) and the
2995 3027 // final counting task.
2996 3028 _card_bm.clear();
2997 3029
2998 3030 // Clear the global region bitmap - it will be filled as part
2999 3031 // of the final counting task.
3000 3032 _region_bm.clear();
3001 3033
3002 3034 uint max_regions = _g1h->max_regions();
3003 3035 assert(_max_task_num != 0, "unitialized");
3004 3036
3005 3037 for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3006 3038 BitMap* task_card_bm = count_card_bitmap_for(i);
3007 3039 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3008 3040
3009 3041 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3010 3042 assert(marked_bytes_array != NULL, "uninitialized");
3011 3043
3012 3044 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3013 3045 task_card_bm->clear();
3014 3046 }
3015 3047 }
3016 3048
3017 3049 void ConcurrentMark::print_stats() {
3018 3050 if (verbose_stats()) {
3019 3051 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3020 3052 for (size_t i = 0; i < _active_tasks; ++i) {
3021 3053 _tasks[i]->print_stats();
3022 3054 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3023 3055 }
3024 3056 }
3025 3057 }
3026 3058
3027 3059 // abandon current marking iteration due to a Full GC
3028 3060 void ConcurrentMark::abort() {
3029 3061 // Clear all marks to force marking thread to do nothing
3030 3062 _nextMarkBitMap->clearAll();
3031 3063 // Clear the liveness counting data
3032 3064 clear_all_count_data();
3033 3065 // Empty mark stack
3034 3066 clear_marking_state();
3035 3067 for (int i = 0; i < (int)_max_task_num; ++i) {
3036 3068 _tasks[i]->clear_region_fields();
3037 3069 }
3038 3070 _has_aborted = true;
3039 3071
3040 3072 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3041 3073 satb_mq_set.abandon_partial_marking();
3042 3074 // This can be called either during or outside marking, we'll read
3043 3075 // the expected_active value from the SATB queue set.
3044 3076 satb_mq_set.set_active_all_threads(
3045 3077 false, /* new active value */
3046 3078 satb_mq_set.is_active() /* expected_active */);
3047 3079 }
3048 3080
3049 3081 static void print_ms_time_info(const char* prefix, const char* name,
3050 3082 NumberSeq& ns) {
3051 3083 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3052 3084 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3053 3085 if (ns.num() > 0) {
3054 3086 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3055 3087 prefix, ns.sd(), ns.maximum());
3056 3088 }
3057 3089 }
3058 3090
3059 3091 void ConcurrentMark::print_summary_info() {
3060 3092 gclog_or_tty->print_cr(" Concurrent marking:");
3061 3093 print_ms_time_info(" ", "init marks", _init_times);
3062 3094 print_ms_time_info(" ", "remarks", _remark_times);
3063 3095 {
3064 3096 print_ms_time_info(" ", "final marks", _remark_mark_times);
3065 3097 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3066 3098
3067 3099 }
3068 3100 print_ms_time_info(" ", "cleanups", _cleanup_times);
3069 3101 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3070 3102 _total_counting_time,
3071 3103 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3072 3104 (double)_cleanup_times.num()
3073 3105 : 0.0));
3074 3106 if (G1ScrubRemSets) {
3075 3107 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3076 3108 _total_rs_scrub_time,
3077 3109 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3078 3110 (double)_cleanup_times.num()
3079 3111 : 0.0));
3080 3112 }
3081 3113 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3082 3114 (_init_times.sum() + _remark_times.sum() +
3083 3115 _cleanup_times.sum())/1000.0);
3084 3116 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3085 3117 "(%8.2f s marking).",
3086 3118 cmThread()->vtime_accum(),
3087 3119 cmThread()->vtime_mark_accum());
3088 3120 }
3089 3121
3090 3122 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3091 3123 _parallel_workers->print_worker_threads_on(st);
3092 3124 }
3093 3125
3094 3126 // We take a break if someone is trying to stop the world.
3095 3127 bool ConcurrentMark::do_yield_check(uint worker_id) {
3096 3128 if (should_yield()) {
3097 3129 if (worker_id == 0) {
3098 3130 _g1h->g1_policy()->record_concurrent_pause();
3099 3131 }
3100 3132 cmThread()->yield();
3101 3133 return true;
3102 3134 } else {
3103 3135 return false;
3104 3136 }
3105 3137 }
3106 3138
3107 3139 bool ConcurrentMark::should_yield() {
3108 3140 return cmThread()->should_yield();
3109 3141 }
3110 3142
3111 3143 bool ConcurrentMark::containing_card_is_marked(void* p) {
3112 3144 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3113 3145 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3114 3146 }
3115 3147
3116 3148 bool ConcurrentMark::containing_cards_are_marked(void* start,
3117 3149 void* last) {
3118 3150 return containing_card_is_marked(start) &&
3119 3151 containing_card_is_marked(last);
3120 3152 }
3121 3153
3122 3154 #ifndef PRODUCT
3123 3155 // for debugging purposes
3124 3156 void ConcurrentMark::print_finger() {
3125 3157 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3126 3158 _heap_start, _heap_end, _finger);
3127 3159 for (int i = 0; i < (int) _max_task_num; ++i) {
3128 3160 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
3129 3161 }
3130 3162 gclog_or_tty->print_cr("");
3131 3163 }
3132 3164 #endif
3133 3165
3134 3166 void CMTask::scan_object(oop obj) {
3135 3167 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3136 3168
3137 3169 if (_cm->verbose_high()) {
3138 3170 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3139 3171 _task_id, (void*) obj);
3140 3172 }
3141 3173
3142 3174 size_t obj_size = obj->size();
3143 3175 _words_scanned += obj_size;
3144 3176
3145 3177 obj->oop_iterate(_cm_oop_closure);
3146 3178 statsOnly( ++_objs_scanned );
3147 3179 check_limits();
3148 3180 }
3149 3181
3150 3182 // Closure for iteration over bitmaps
3151 3183 class CMBitMapClosure : public BitMapClosure {
3152 3184 private:
3153 3185 // the bitmap that is being iterated over
3154 3186 CMBitMap* _nextMarkBitMap;
3155 3187 ConcurrentMark* _cm;
3156 3188 CMTask* _task;
3157 3189
3158 3190 public:
3159 3191 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3160 3192 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3161 3193
3162 3194 bool do_bit(size_t offset) {
3163 3195 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3164 3196 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3165 3197 assert( addr < _cm->finger(), "invariant");
3166 3198
3167 3199 statsOnly( _task->increase_objs_found_on_bitmap() );
3168 3200 assert(addr >= _task->finger(), "invariant");
3169 3201
3170 3202 // We move that task's local finger along.
3171 3203 _task->move_finger_to(addr);
3172 3204
3173 3205 _task->scan_object(oop(addr));
3174 3206 // we only partially drain the local queue and global stack
3175 3207 _task->drain_local_queue(true);
3176 3208 _task->drain_global_stack(true);
3177 3209
3178 3210 // if the has_aborted flag has been raised, we need to bail out of
3179 3211 // the iteration
3180 3212 return !_task->has_aborted();
3181 3213 }
3182 3214 };
3183 3215
3184 3216 // Closure for iterating over objects, currently only used for
3185 3217 // processing SATB buffers.
3186 3218 class CMObjectClosure : public ObjectClosure {
3187 3219 private:
3188 3220 CMTask* _task;
3189 3221
3190 3222 public:
3191 3223 void do_object(oop obj) {
3192 3224 _task->deal_with_reference(obj);
3193 3225 }
3194 3226
3195 3227 CMObjectClosure(CMTask* task) : _task(task) { }
3196 3228 };
3197 3229
3198 3230 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3199 3231 ConcurrentMark* cm,
3200 3232 CMTask* task)
3201 3233 : _g1h(g1h), _cm(cm), _task(task) {
3202 3234 assert(_ref_processor == NULL, "should be initialized to NULL");
3203 3235
3204 3236 if (G1UseConcMarkReferenceProcessing) {
3205 3237 _ref_processor = g1h->ref_processor_cm();
3206 3238 assert(_ref_processor != NULL, "should not be NULL");
3207 3239 }
3208 3240 }
3209 3241
3210 3242 void CMTask::setup_for_region(HeapRegion* hr) {
3211 3243 // Separated the asserts so that we know which one fires.
3212 3244 assert(hr != NULL,
3213 3245 "claim_region() should have filtered out continues humongous regions");
3214 3246 assert(!hr->continuesHumongous(),
3215 3247 "claim_region() should have filtered out continues humongous regions");
3216 3248
3217 3249 if (_cm->verbose_low()) {
3218 3250 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3219 3251 _task_id, hr);
3220 3252 }
3221 3253
3222 3254 _curr_region = hr;
3223 3255 _finger = hr->bottom();
3224 3256 update_region_limit();
3225 3257 }
3226 3258
3227 3259 void CMTask::update_region_limit() {
3228 3260 HeapRegion* hr = _curr_region;
3229 3261 HeapWord* bottom = hr->bottom();
3230 3262 HeapWord* limit = hr->next_top_at_mark_start();
3231 3263
3232 3264 if (limit == bottom) {
3233 3265 if (_cm->verbose_low()) {
3234 3266 gclog_or_tty->print_cr("[%d] found an empty region "
3235 3267 "["PTR_FORMAT", "PTR_FORMAT")",
3236 3268 _task_id, bottom, limit);
3237 3269 }
3238 3270 // The region was collected underneath our feet.
3239 3271 // We set the finger to bottom to ensure that the bitmap
3240 3272 // iteration that will follow this will not do anything.
3241 3273 // (this is not a condition that holds when we set the region up,
3242 3274 // as the region is not supposed to be empty in the first place)
3243 3275 _finger = bottom;
3244 3276 } else if (limit >= _region_limit) {
3245 3277 assert(limit >= _finger, "peace of mind");
3246 3278 } else {
3247 3279 assert(limit < _region_limit, "only way to get here");
3248 3280 // This can happen under some pretty unusual circumstances. An
3249 3281 // evacuation pause empties the region underneath our feet (NTAMS
3250 3282 // at bottom). We then do some allocation in the region (NTAMS
3251 3283 // stays at bottom), followed by the region being used as a GC
3252 3284 // alloc region (NTAMS will move to top() and the objects
3253 3285 // originally below it will be grayed). All objects now marked in
3254 3286 // the region are explicitly grayed, if below the global finger,
3255 3287 // and we do not need in fact to scan anything else. So, we simply
3256 3288 // set _finger to be limit to ensure that the bitmap iteration
3257 3289 // doesn't do anything.
3258 3290 _finger = limit;
3259 3291 }
3260 3292
3261 3293 _region_limit = limit;
3262 3294 }
3263 3295
3264 3296 void CMTask::giveup_current_region() {
3265 3297 assert(_curr_region != NULL, "invariant");
3266 3298 if (_cm->verbose_low()) {
3267 3299 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3268 3300 _task_id, _curr_region);
3269 3301 }
3270 3302 clear_region_fields();
3271 3303 }
3272 3304
3273 3305 void CMTask::clear_region_fields() {
3274 3306 // Values for these three fields that indicate that we're not
3275 3307 // holding on to a region.
3276 3308 _curr_region = NULL;
3277 3309 _finger = NULL;
3278 3310 _region_limit = NULL;
3279 3311 }
3280 3312
3281 3313 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3282 3314 if (cm_oop_closure == NULL) {
3283 3315 assert(_cm_oop_closure != NULL, "invariant");
3284 3316 } else {
3285 3317 assert(_cm_oop_closure == NULL, "invariant");
3286 3318 }
3287 3319 _cm_oop_closure = cm_oop_closure;
3288 3320 }
3289 3321
3290 3322 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3291 3323 guarantee(nextMarkBitMap != NULL, "invariant");
3292 3324
3293 3325 if (_cm->verbose_low()) {
3294 3326 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3295 3327 }
3296 3328
3297 3329 _nextMarkBitMap = nextMarkBitMap;
3298 3330 clear_region_fields();
3299 3331
3300 3332 _calls = 0;
3301 3333 _elapsed_time_ms = 0.0;
3302 3334 _termination_time_ms = 0.0;
3303 3335 _termination_start_time_ms = 0.0;
3304 3336
3305 3337 #if _MARKING_STATS_
3306 3338 _local_pushes = 0;
3307 3339 _local_pops = 0;
3308 3340 _local_max_size = 0;
3309 3341 _objs_scanned = 0;
3310 3342 _global_pushes = 0;
3311 3343 _global_pops = 0;
3312 3344 _global_max_size = 0;
3313 3345 _global_transfers_to = 0;
3314 3346 _global_transfers_from = 0;
3315 3347 _regions_claimed = 0;
3316 3348 _objs_found_on_bitmap = 0;
3317 3349 _satb_buffers_processed = 0;
3318 3350 _steal_attempts = 0;
3319 3351 _steals = 0;
3320 3352 _aborted = 0;
3321 3353 _aborted_overflow = 0;
3322 3354 _aborted_cm_aborted = 0;
3323 3355 _aborted_yield = 0;
3324 3356 _aborted_timed_out = 0;
3325 3357 _aborted_satb = 0;
3326 3358 _aborted_termination = 0;
3327 3359 #endif // _MARKING_STATS_
3328 3360 }
3329 3361
3330 3362 bool CMTask::should_exit_termination() {
3331 3363 regular_clock_call();
3332 3364 // This is called when we are in the termination protocol. We should
3333 3365 // quit if, for some reason, this task wants to abort or the global
3334 3366 // stack is not empty (this means that we can get work from it).
3335 3367 return !_cm->mark_stack_empty() || has_aborted();
3336 3368 }
3337 3369
3338 3370 void CMTask::reached_limit() {
3339 3371 assert(_words_scanned >= _words_scanned_limit ||
3340 3372 _refs_reached >= _refs_reached_limit ,
3341 3373 "shouldn't have been called otherwise");
3342 3374 regular_clock_call();
3343 3375 }
3344 3376
3345 3377 void CMTask::regular_clock_call() {
3346 3378 if (has_aborted()) return;
3347 3379
3348 3380 // First, we need to recalculate the words scanned and refs reached
3349 3381 // limits for the next clock call.
3350 3382 recalculate_limits();
3351 3383
3352 3384 // During the regular clock call we do the following
3353 3385
3354 3386 // (1) If an overflow has been flagged, then we abort.
3355 3387 if (_cm->has_overflown()) {
3356 3388 set_has_aborted();
3357 3389 return;
3358 3390 }
3359 3391
3360 3392 // If we are not concurrent (i.e. we're doing remark) we don't need
3361 3393 // to check anything else. The other steps are only needed during
3362 3394 // the concurrent marking phase.
3363 3395 if (!concurrent()) return;
3364 3396
3365 3397 // (2) If marking has been aborted for Full GC, then we also abort.
3366 3398 if (_cm->has_aborted()) {
3367 3399 set_has_aborted();
3368 3400 statsOnly( ++_aborted_cm_aborted );
3369 3401 return;
3370 3402 }
3371 3403
3372 3404 double curr_time_ms = os::elapsedVTime() * 1000.0;
3373 3405
3374 3406 // (3) If marking stats are enabled, then we update the step history.
3375 3407 #if _MARKING_STATS_
3376 3408 if (_words_scanned >= _words_scanned_limit) {
3377 3409 ++_clock_due_to_scanning;
3378 3410 }
3379 3411 if (_refs_reached >= _refs_reached_limit) {
3380 3412 ++_clock_due_to_marking;
3381 3413 }
3382 3414
3383 3415 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3384 3416 _interval_start_time_ms = curr_time_ms;
3385 3417 _all_clock_intervals_ms.add(last_interval_ms);
3386 3418
3387 3419 if (_cm->verbose_medium()) {
3388 3420 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3389 3421 "scanned = %d%s, refs reached = %d%s",
3390 3422 _task_id, last_interval_ms,
3391 3423 _words_scanned,
3392 3424 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3393 3425 _refs_reached,
3394 3426 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3395 3427 }
3396 3428 #endif // _MARKING_STATS_
3397 3429
3398 3430 // (4) We check whether we should yield. If we have to, then we abort.
3399 3431 if (_cm->should_yield()) {
3400 3432 // We should yield. To do this we abort the task. The caller is
3401 3433 // responsible for yielding.
3402 3434 set_has_aborted();
3403 3435 statsOnly( ++_aborted_yield );
3404 3436 return;
3405 3437 }
3406 3438
3407 3439 // (5) We check whether we've reached our time quota. If we have,
3408 3440 // then we abort.
3409 3441 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3410 3442 if (elapsed_time_ms > _time_target_ms) {
3411 3443 set_has_aborted();
3412 3444 _has_timed_out = true;
3413 3445 statsOnly( ++_aborted_timed_out );
3414 3446 return;
3415 3447 }
3416 3448
3417 3449 // (6) Finally, we check whether there are enough completed STAB
3418 3450 // buffers available for processing. If there are, we abort.
3419 3451 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3420 3452 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3421 3453 if (_cm->verbose_low()) {
3422 3454 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3423 3455 _task_id);
3424 3456 }
3425 3457 // we do need to process SATB buffers, we'll abort and restart
3426 3458 // the marking task to do so
3427 3459 set_has_aborted();
3428 3460 statsOnly( ++_aborted_satb );
3429 3461 return;
3430 3462 }
3431 3463 }
3432 3464
3433 3465 void CMTask::recalculate_limits() {
3434 3466 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3435 3467 _words_scanned_limit = _real_words_scanned_limit;
3436 3468
3437 3469 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3438 3470 _refs_reached_limit = _real_refs_reached_limit;
3439 3471 }
3440 3472
3441 3473 void CMTask::decrease_limits() {
3442 3474 // This is called when we believe that we're going to do an infrequent
3443 3475 // operation which will increase the per byte scanned cost (i.e. move
3444 3476 // entries to/from the global stack). It basically tries to decrease the
3445 3477 // scanning limit so that the clock is called earlier.
3446 3478
3447 3479 if (_cm->verbose_medium()) {
3448 3480 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3449 3481 }
3450 3482
3451 3483 _words_scanned_limit = _real_words_scanned_limit -
3452 3484 3 * words_scanned_period / 4;
3453 3485 _refs_reached_limit = _real_refs_reached_limit -
3454 3486 3 * refs_reached_period / 4;
3455 3487 }
3456 3488
3457 3489 void CMTask::move_entries_to_global_stack() {
3458 3490 // local array where we'll store the entries that will be popped
3459 3491 // from the local queue
3460 3492 oop buffer[global_stack_transfer_size];
3461 3493
3462 3494 int n = 0;
3463 3495 oop obj;
3464 3496 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3465 3497 buffer[n] = obj;
3466 3498 ++n;
3467 3499 }
3468 3500
3469 3501 if (n > 0) {
3470 3502 // we popped at least one entry from the local queue
3471 3503
3472 3504 statsOnly( ++_global_transfers_to; _local_pops += n );
3473 3505
3474 3506 if (!_cm->mark_stack_push(buffer, n)) {
3475 3507 if (_cm->verbose_low()) {
3476 3508 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3477 3509 _task_id);
3478 3510 }
3479 3511 set_has_aborted();
3480 3512 } else {
3481 3513 // the transfer was successful
3482 3514
3483 3515 if (_cm->verbose_medium()) {
3484 3516 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3485 3517 _task_id, n);
3486 3518 }
3487 3519 statsOnly( int tmp_size = _cm->mark_stack_size();
3488 3520 if (tmp_size > _global_max_size) {
3489 3521 _global_max_size = tmp_size;
3490 3522 }
3491 3523 _global_pushes += n );
3492 3524 }
3493 3525 }
3494 3526
3495 3527 // this operation was quite expensive, so decrease the limits
3496 3528 decrease_limits();
3497 3529 }
3498 3530
3499 3531 void CMTask::get_entries_from_global_stack() {
3500 3532 // local array where we'll store the entries that will be popped
3501 3533 // from the global stack.
3502 3534 oop buffer[global_stack_transfer_size];
3503 3535 int n;
3504 3536 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3505 3537 assert(n <= global_stack_transfer_size,
3506 3538 "we should not pop more than the given limit");
3507 3539 if (n > 0) {
3508 3540 // yes, we did actually pop at least one entry
3509 3541
3510 3542 statsOnly( ++_global_transfers_from; _global_pops += n );
3511 3543 if (_cm->verbose_medium()) {
3512 3544 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3513 3545 _task_id, n);
3514 3546 }
3515 3547 for (int i = 0; i < n; ++i) {
3516 3548 bool success = _task_queue->push(buffer[i]);
3517 3549 // We only call this when the local queue is empty or under a
3518 3550 // given target limit. So, we do not expect this push to fail.
3519 3551 assert(success, "invariant");
3520 3552 }
3521 3553
3522 3554 statsOnly( int tmp_size = _task_queue->size();
3523 3555 if (tmp_size > _local_max_size) {
3524 3556 _local_max_size = tmp_size;
3525 3557 }
3526 3558 _local_pushes += n );
3527 3559 }
3528 3560
3529 3561 // this operation was quite expensive, so decrease the limits
3530 3562 decrease_limits();
3531 3563 }
3532 3564
3533 3565 void CMTask::drain_local_queue(bool partially) {
3534 3566 if (has_aborted()) return;
3535 3567
3536 3568 // Decide what the target size is, depending whether we're going to
3537 3569 // drain it partially (so that other tasks can steal if they run out
3538 3570 // of things to do) or totally (at the very end).
3539 3571 size_t target_size;
3540 3572 if (partially) {
3541 3573 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3542 3574 } else {
3543 3575 target_size = 0;
3544 3576 }
3545 3577
3546 3578 if (_task_queue->size() > target_size) {
3547 3579 if (_cm->verbose_high()) {
3548 3580 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3549 3581 _task_id, target_size);
3550 3582 }
3551 3583
3552 3584 oop obj;
3553 3585 bool ret = _task_queue->pop_local(obj);
3554 3586 while (ret) {
3555 3587 statsOnly( ++_local_pops );
3556 3588
3557 3589 if (_cm->verbose_high()) {
3558 3590 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3559 3591 (void*) obj);
3560 3592 }
3561 3593
3562 3594 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3563 3595 assert(!_g1h->is_on_master_free_list(
3564 3596 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3565 3597
3566 3598 scan_object(obj);
3567 3599
3568 3600 if (_task_queue->size() <= target_size || has_aborted()) {
3569 3601 ret = false;
3570 3602 } else {
3571 3603 ret = _task_queue->pop_local(obj);
3572 3604 }
3573 3605 }
3574 3606
3575 3607 if (_cm->verbose_high()) {
3576 3608 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3577 3609 _task_id, _task_queue->size());
3578 3610 }
3579 3611 }
3580 3612 }
3581 3613
3582 3614 void CMTask::drain_global_stack(bool partially) {
3583 3615 if (has_aborted()) return;
3584 3616
3585 3617 // We have a policy to drain the local queue before we attempt to
3586 3618 // drain the global stack.
3587 3619 assert(partially || _task_queue->size() == 0, "invariant");
3588 3620
3589 3621 // Decide what the target size is, depending whether we're going to
3590 3622 // drain it partially (so that other tasks can steal if they run out
3591 3623 // of things to do) or totally (at the very end). Notice that,
3592 3624 // because we move entries from the global stack in chunks or
3593 3625 // because another task might be doing the same, we might in fact
3594 3626 // drop below the target. But, this is not a problem.
3595 3627 size_t target_size;
3596 3628 if (partially) {
3597 3629 target_size = _cm->partial_mark_stack_size_target();
3598 3630 } else {
3599 3631 target_size = 0;
3600 3632 }
3601 3633
3602 3634 if (_cm->mark_stack_size() > target_size) {
3603 3635 if (_cm->verbose_low()) {
3604 3636 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3605 3637 _task_id, target_size);
3606 3638 }
3607 3639
3608 3640 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3609 3641 get_entries_from_global_stack();
3610 3642 drain_local_queue(partially);
3611 3643 }
3612 3644
3613 3645 if (_cm->verbose_low()) {
3614 3646 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3615 3647 _task_id, _cm->mark_stack_size());
3616 3648 }
3617 3649 }
3618 3650 }
3619 3651
3620 3652 // SATB Queue has several assumptions on whether to call the par or
3621 3653 // non-par versions of the methods. this is why some of the code is
3622 3654 // replicated. We should really get rid of the single-threaded version
3623 3655 // of the code to simplify things.
3624 3656 void CMTask::drain_satb_buffers() {
3625 3657 if (has_aborted()) return;
3626 3658
3627 3659 // We set this so that the regular clock knows that we're in the
3628 3660 // middle of draining buffers and doesn't set the abort flag when it
3629 3661 // notices that SATB buffers are available for draining. It'd be
3630 3662 // very counter productive if it did that. :-)
3631 3663 _draining_satb_buffers = true;
3632 3664
3633 3665 CMObjectClosure oc(this);
3634 3666 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3635 3667 if (G1CollectedHeap::use_parallel_gc_threads()) {
3636 3668 satb_mq_set.set_par_closure(_task_id, &oc);
3637 3669 } else {
3638 3670 satb_mq_set.set_closure(&oc);
3639 3671 }
3640 3672
3641 3673 // This keeps claiming and applying the closure to completed buffers
3642 3674 // until we run out of buffers or we need to abort.
3643 3675 if (G1CollectedHeap::use_parallel_gc_threads()) {
3644 3676 while (!has_aborted() &&
3645 3677 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3646 3678 if (_cm->verbose_medium()) {
3647 3679 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3648 3680 }
3649 3681 statsOnly( ++_satb_buffers_processed );
3650 3682 regular_clock_call();
3651 3683 }
3652 3684 } else {
3653 3685 while (!has_aborted() &&
3654 3686 satb_mq_set.apply_closure_to_completed_buffer()) {
3655 3687 if (_cm->verbose_medium()) {
3656 3688 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3657 3689 }
3658 3690 statsOnly( ++_satb_buffers_processed );
3659 3691 regular_clock_call();
3660 3692 }
3661 3693 }
3662 3694
3663 3695 if (!concurrent() && !has_aborted()) {
3664 3696 // We should only do this during remark.
3665 3697 if (G1CollectedHeap::use_parallel_gc_threads()) {
3666 3698 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3667 3699 } else {
3668 3700 satb_mq_set.iterate_closure_all_threads();
3669 3701 }
3670 3702 }
3671 3703
3672 3704 _draining_satb_buffers = false;
3673 3705
3674 3706 assert(has_aborted() ||
3675 3707 concurrent() ||
3676 3708 satb_mq_set.completed_buffers_num() == 0, "invariant");
3677 3709
3678 3710 if (G1CollectedHeap::use_parallel_gc_threads()) {
3679 3711 satb_mq_set.set_par_closure(_task_id, NULL);
3680 3712 } else {
3681 3713 satb_mq_set.set_closure(NULL);
3682 3714 }
3683 3715
3684 3716 // again, this was a potentially expensive operation, decrease the
3685 3717 // limits to get the regular clock call early
3686 3718 decrease_limits();
3687 3719 }
3688 3720
3689 3721 void CMTask::print_stats() {
3690 3722 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3691 3723 _task_id, _calls);
3692 3724 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3693 3725 _elapsed_time_ms, _termination_time_ms);
3694 3726 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3695 3727 _step_times_ms.num(), _step_times_ms.avg(),
3696 3728 _step_times_ms.sd());
3697 3729 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3698 3730 _step_times_ms.maximum(), _step_times_ms.sum());
3699 3731
3700 3732 #if _MARKING_STATS_
3701 3733 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3702 3734 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3703 3735 _all_clock_intervals_ms.sd());
3704 3736 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3705 3737 _all_clock_intervals_ms.maximum(),
3706 3738 _all_clock_intervals_ms.sum());
3707 3739 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3708 3740 _clock_due_to_scanning, _clock_due_to_marking);
3709 3741 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3710 3742 _objs_scanned, _objs_found_on_bitmap);
3711 3743 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3712 3744 _local_pushes, _local_pops, _local_max_size);
3713 3745 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3714 3746 _global_pushes, _global_pops, _global_max_size);
3715 3747 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3716 3748 _global_transfers_to,_global_transfers_from);
3717 3749 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
3718 3750 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3719 3751 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3720 3752 _steal_attempts, _steals);
3721 3753 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3722 3754 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3723 3755 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3724 3756 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3725 3757 _aborted_timed_out, _aborted_satb, _aborted_termination);
3726 3758 #endif // _MARKING_STATS_
3727 3759 }
3728 3760
3729 3761 /*****************************************************************************
3730 3762
3731 3763 The do_marking_step(time_target_ms) method is the building block
3732 3764 of the parallel marking framework. It can be called in parallel
3733 3765 with other invocations of do_marking_step() on different tasks
3734 3766 (but only one per task, obviously) and concurrently with the
3735 3767 mutator threads, or during remark, hence it eliminates the need
3736 3768 for two versions of the code. When called during remark, it will
3737 3769 pick up from where the task left off during the concurrent marking
3738 3770 phase. Interestingly, tasks are also claimable during evacuation
3739 3771 pauses too, since do_marking_step() ensures that it aborts before
3740 3772 it needs to yield.
3741 3773
3742 3774 The data structures that is uses to do marking work are the
3743 3775 following:
3744 3776
3745 3777 (1) Marking Bitmap. If there are gray objects that appear only
3746 3778 on the bitmap (this happens either when dealing with an overflow
3747 3779 or when the initial marking phase has simply marked the roots
3748 3780 and didn't push them on the stack), then tasks claim heap
3749 3781 regions whose bitmap they then scan to find gray objects. A
3750 3782 global finger indicates where the end of the last claimed region
3751 3783 is. A local finger indicates how far into the region a task has
3752 3784 scanned. The two fingers are used to determine how to gray an
3753 3785 object (i.e. whether simply marking it is OK, as it will be
3754 3786 visited by a task in the future, or whether it needs to be also
3755 3787 pushed on a stack).
3756 3788
3757 3789 (2) Local Queue. The local queue of the task which is accessed
3758 3790 reasonably efficiently by the task. Other tasks can steal from
3759 3791 it when they run out of work. Throughout the marking phase, a
3760 3792 task attempts to keep its local queue short but not totally
3761 3793 empty, so that entries are available for stealing by other
3762 3794 tasks. Only when there is no more work, a task will totally
3763 3795 drain its local queue.
3764 3796
3765 3797 (3) Global Mark Stack. This handles local queue overflow. During
3766 3798 marking only sets of entries are moved between it and the local
3767 3799 queues, as access to it requires a mutex and more fine-grain
3768 3800 interaction with it which might cause contention. If it
3769 3801 overflows, then the marking phase should restart and iterate
3770 3802 over the bitmap to identify gray objects. Throughout the marking
3771 3803 phase, tasks attempt to keep the global mark stack at a small
3772 3804 length but not totally empty, so that entries are available for
3773 3805 popping by other tasks. Only when there is no more work, tasks
3774 3806 will totally drain the global mark stack.
3775 3807
3776 3808 (4) SATB Buffer Queue. This is where completed SATB buffers are
3777 3809 made available. Buffers are regularly removed from this queue
3778 3810 and scanned for roots, so that the queue doesn't get too
3779 3811 long. During remark, all completed buffers are processed, as
3780 3812 well as the filled in parts of any uncompleted buffers.
3781 3813
3782 3814 The do_marking_step() method tries to abort when the time target
3783 3815 has been reached. There are a few other cases when the
3784 3816 do_marking_step() method also aborts:
3785 3817
3786 3818 (1) When the marking phase has been aborted (after a Full GC).
3787 3819
3788 3820 (2) When a global overflow (on the global stack) has been
3789 3821 triggered. Before the task aborts, it will actually sync up with
3790 3822 the other tasks to ensure that all the marking data structures
3791 3823 (local queues, stacks, fingers etc.) are re-initialised so that
3792 3824 when do_marking_step() completes, the marking phase can
3793 3825 immediately restart.
3794 3826
3795 3827 (3) When enough completed SATB buffers are available. The
3796 3828 do_marking_step() method only tries to drain SATB buffers right
3797 3829 at the beginning. So, if enough buffers are available, the
3798 3830 marking step aborts and the SATB buffers are processed at
3799 3831 the beginning of the next invocation.
3800 3832
3801 3833 (4) To yield. when we have to yield then we abort and yield
3802 3834 right at the end of do_marking_step(). This saves us from a lot
3803 3835 of hassle as, by yielding we might allow a Full GC. If this
3804 3836 happens then objects will be compacted underneath our feet, the
3805 3837 heap might shrink, etc. We save checking for this by just
3806 3838 aborting and doing the yield right at the end.
3807 3839
3808 3840 From the above it follows that the do_marking_step() method should
3809 3841 be called in a loop (or, otherwise, regularly) until it completes.
3810 3842
3811 3843 If a marking step completes without its has_aborted() flag being
3812 3844 true, it means it has completed the current marking phase (and
3813 3845 also all other marking tasks have done so and have all synced up).
3814 3846
3815 3847 A method called regular_clock_call() is invoked "regularly" (in
3816 3848 sub ms intervals) throughout marking. It is this clock method that
3817 3849 checks all the abort conditions which were mentioned above and
3818 3850 decides when the task should abort. A work-based scheme is used to
3819 3851 trigger this clock method: when the number of object words the
3820 3852 marking phase has scanned or the number of references the marking
3821 3853 phase has visited reach a given limit. Additional invocations to
3822 3854 the method clock have been planted in a few other strategic places
3823 3855 too. The initial reason for the clock method was to avoid calling
3824 3856 vtime too regularly, as it is quite expensive. So, once it was in
3825 3857 place, it was natural to piggy-back all the other conditions on it
3826 3858 too and not constantly check them throughout the code.
3827 3859
3828 3860 *****************************************************************************/
3829 3861
3830 3862 void CMTask::do_marking_step(double time_target_ms,
3831 3863 bool do_stealing,
3832 3864 bool do_termination) {
3833 3865 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3834 3866 assert(concurrent() == _cm->concurrent(), "they should be the same");
3835 3867
3836 3868 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3837 3869 assert(_task_queues != NULL, "invariant");
3838 3870 assert(_task_queue != NULL, "invariant");
3839 3871 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3840 3872
3841 3873 assert(!_claimed,
3842 3874 "only one thread should claim this task at any one time");
3843 3875
3844 3876 // OK, this doesn't safeguard again all possible scenarios, as it is
3845 3877 // possible for two threads to set the _claimed flag at the same
3846 3878 // time. But it is only for debugging purposes anyway and it will
3847 3879 // catch most problems.
3848 3880 _claimed = true;
3849 3881
3850 3882 _start_time_ms = os::elapsedVTime() * 1000.0;
3851 3883 statsOnly( _interval_start_time_ms = _start_time_ms );
3852 3884
3853 3885 double diff_prediction_ms =
3854 3886 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3855 3887 _time_target_ms = time_target_ms - diff_prediction_ms;
3856 3888
3857 3889 // set up the variables that are used in the work-based scheme to
3858 3890 // call the regular clock method
3859 3891 _words_scanned = 0;
3860 3892 _refs_reached = 0;
3861 3893 recalculate_limits();
3862 3894
3863 3895 // clear all flags
3864 3896 clear_has_aborted();
3865 3897 _has_timed_out = false;
3866 3898 _draining_satb_buffers = false;
3867 3899
3868 3900 ++_calls;
3869 3901
3870 3902 if (_cm->verbose_low()) {
3871 3903 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3872 3904 "target = %1.2lfms >>>>>>>>>>",
3873 3905 _task_id, _calls, _time_target_ms);
3874 3906 }
3875 3907
3876 3908 // Set up the bitmap and oop closures. Anything that uses them is
3877 3909 // eventually called from this method, so it is OK to allocate these
3878 3910 // statically.
3879 3911 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3880 3912 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
3881 3913 set_cm_oop_closure(&cm_oop_closure);
3882 3914
3883 3915 if (_cm->has_overflown()) {
3884 3916 // This can happen if the mark stack overflows during a GC pause
3885 3917 // and this task, after a yield point, restarts. We have to abort
3886 3918 // as we need to get into the overflow protocol which happens
3887 3919 // right at the end of this task.
3888 3920 set_has_aborted();
3889 3921 }
3890 3922
3891 3923 // First drain any available SATB buffers. After this, we will not
3892 3924 // look at SATB buffers before the next invocation of this method.
3893 3925 // If enough completed SATB buffers are queued up, the regular clock
3894 3926 // will abort this task so that it restarts.
3895 3927 drain_satb_buffers();
3896 3928 // ...then partially drain the local queue and the global stack
3897 3929 drain_local_queue(true);
3898 3930 drain_global_stack(true);
3899 3931
3900 3932 do {
3901 3933 if (!has_aborted() && _curr_region != NULL) {
3902 3934 // This means that we're already holding on to a region.
3903 3935 assert(_finger != NULL, "if region is not NULL, then the finger "
3904 3936 "should not be NULL either");
3905 3937
3906 3938 // We might have restarted this task after an evacuation pause
3907 3939 // which might have evacuated the region we're holding on to
3908 3940 // underneath our feet. Let's read its limit again to make sure
3909 3941 // that we do not iterate over a region of the heap that
3910 3942 // contains garbage (update_region_limit() will also move
3911 3943 // _finger to the start of the region if it is found empty).
3912 3944 update_region_limit();
3913 3945 // We will start from _finger not from the start of the region,
3914 3946 // as we might be restarting this task after aborting half-way
3915 3947 // through scanning this region. In this case, _finger points to
3916 3948 // the address where we last found a marked object. If this is a
3917 3949 // fresh region, _finger points to start().
3918 3950 MemRegion mr = MemRegion(_finger, _region_limit);
3919 3951
3920 3952 if (_cm->verbose_low()) {
3921 3953 gclog_or_tty->print_cr("[%d] we're scanning part "
3922 3954 "["PTR_FORMAT", "PTR_FORMAT") "
3923 3955 "of region "PTR_FORMAT,
3924 3956 _task_id, _finger, _region_limit, _curr_region);
3925 3957 }
3926 3958
3927 3959 // Let's iterate over the bitmap of the part of the
3928 3960 // region that is left.
3929 3961 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3930 3962 // We successfully completed iterating over the region. Now,
3931 3963 // let's give up the region.
3932 3964 giveup_current_region();
3933 3965 regular_clock_call();
3934 3966 } else {
3935 3967 assert(has_aborted(), "currently the only way to do so");
3936 3968 // The only way to abort the bitmap iteration is to return
3937 3969 // false from the do_bit() method. However, inside the
3938 3970 // do_bit() method we move the _finger to point to the
3939 3971 // object currently being looked at. So, if we bail out, we
3940 3972 // have definitely set _finger to something non-null.
3941 3973 assert(_finger != NULL, "invariant");
3942 3974
3943 3975 // Region iteration was actually aborted. So now _finger
3944 3976 // points to the address of the object we last scanned. If we
3945 3977 // leave it there, when we restart this task, we will rescan
3946 3978 // the object. It is easy to avoid this. We move the finger by
3947 3979 // enough to point to the next possible object header (the
3948 3980 // bitmap knows by how much we need to move it as it knows its
3949 3981 // granularity).
3950 3982 assert(_finger < _region_limit, "invariant");
3951 3983 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3952 3984 // Check if bitmap iteration was aborted while scanning the last object
3953 3985 if (new_finger >= _region_limit) {
3954 3986 giveup_current_region();
3955 3987 } else {
3956 3988 move_finger_to(new_finger);
3957 3989 }
3958 3990 }
3959 3991 }
3960 3992 // At this point we have either completed iterating over the
3961 3993 // region we were holding on to, or we have aborted.
3962 3994
3963 3995 // We then partially drain the local queue and the global stack.
3964 3996 // (Do we really need this?)
3965 3997 drain_local_queue(true);
3966 3998 drain_global_stack(true);
3967 3999
3968 4000 // Read the note on the claim_region() method on why it might
3969 4001 // return NULL with potentially more regions available for
3970 4002 // claiming and why we have to check out_of_regions() to determine
3971 4003 // whether we're done or not.
3972 4004 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3973 4005 // We are going to try to claim a new region. We should have
3974 4006 // given up on the previous one.
3975 4007 // Separated the asserts so that we know which one fires.
3976 4008 assert(_curr_region == NULL, "invariant");
3977 4009 assert(_finger == NULL, "invariant");
3978 4010 assert(_region_limit == NULL, "invariant");
3979 4011 if (_cm->verbose_low()) {
3980 4012 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3981 4013 }
3982 4014 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3983 4015 if (claimed_region != NULL) {
3984 4016 // Yes, we managed to claim one
3985 4017 statsOnly( ++_regions_claimed );
3986 4018
3987 4019 if (_cm->verbose_low()) {
3988 4020 gclog_or_tty->print_cr("[%d] we successfully claimed "
3989 4021 "region "PTR_FORMAT,
3990 4022 _task_id, claimed_region);
3991 4023 }
3992 4024
3993 4025 setup_for_region(claimed_region);
3994 4026 assert(_curr_region == claimed_region, "invariant");
3995 4027 }
3996 4028 // It is important to call the regular clock here. It might take
3997 4029 // a while to claim a region if, for example, we hit a large
3998 4030 // block of empty regions. So we need to call the regular clock
3999 4031 // method once round the loop to make sure it's called
4000 4032 // frequently enough.
4001 4033 regular_clock_call();
4002 4034 }
4003 4035
4004 4036 if (!has_aborted() && _curr_region == NULL) {
4005 4037 assert(_cm->out_of_regions(),
4006 4038 "at this point we should be out of regions");
4007 4039 }
4008 4040 } while ( _curr_region != NULL && !has_aborted());
4009 4041
4010 4042 if (!has_aborted()) {
4011 4043 // We cannot check whether the global stack is empty, since other
4012 4044 // tasks might be pushing objects to it concurrently.
4013 4045 assert(_cm->out_of_regions(),
4014 4046 "at this point we should be out of regions");
4015 4047
4016 4048 if (_cm->verbose_low()) {
4017 4049 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4018 4050 }
4019 4051
4020 4052 // Try to reduce the number of available SATB buffers so that
4021 4053 // remark has less work to do.
4022 4054 drain_satb_buffers();
4023 4055 }
4024 4056
4025 4057 // Since we've done everything else, we can now totally drain the
4026 4058 // local queue and global stack.
4027 4059 drain_local_queue(false);
4028 4060 drain_global_stack(false);
4029 4061
4030 4062 // Attempt at work stealing from other task's queues.
4031 4063 if (do_stealing && !has_aborted()) {
4032 4064 // We have not aborted. This means that we have finished all that
4033 4065 // we could. Let's try to do some stealing...
4034 4066
4035 4067 // We cannot check whether the global stack is empty, since other
4036 4068 // tasks might be pushing objects to it concurrently.
4037 4069 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4038 4070 "only way to reach here");
4039 4071
4040 4072 if (_cm->verbose_low()) {
4041 4073 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4042 4074 }
4043 4075
4044 4076 while (!has_aborted()) {
4045 4077 oop obj;
4046 4078 statsOnly( ++_steal_attempts );
4047 4079
4048 4080 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4049 4081 if (_cm->verbose_medium()) {
4050 4082 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4051 4083 _task_id, (void*) obj);
4052 4084 }
4053 4085
4054 4086 statsOnly( ++_steals );
4055 4087
4056 4088 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4057 4089 "any stolen object should be marked");
4058 4090 scan_object(obj);
4059 4091
4060 4092 // And since we're towards the end, let's totally drain the
4061 4093 // local queue and global stack.
4062 4094 drain_local_queue(false);
4063 4095 drain_global_stack(false);
4064 4096 } else {
4065 4097 break;
4066 4098 }
4067 4099 }
4068 4100 }
4069 4101
4070 4102 // If we are about to wrap up and go into termination, check if we
4071 4103 // should raise the overflow flag.
4072 4104 if (do_termination && !has_aborted()) {
4073 4105 if (_cm->force_overflow()->should_force()) {
4074 4106 _cm->set_has_overflown();
4075 4107 regular_clock_call();
4076 4108 }
4077 4109 }
4078 4110
4079 4111 // We still haven't aborted. Now, let's try to get into the
4080 4112 // termination protocol.
4081 4113 if (do_termination && !has_aborted()) {
4082 4114 // We cannot check whether the global stack is empty, since other
4083 4115 // tasks might be concurrently pushing objects on it.
4084 4116 // Separated the asserts so that we know which one fires.
4085 4117 assert(_cm->out_of_regions(), "only way to reach here");
4086 4118 assert(_task_queue->size() == 0, "only way to reach here");
4087 4119
4088 4120 if (_cm->verbose_low()) {
4089 4121 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4090 4122 }
4091 4123
4092 4124 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4093 4125 // The CMTask class also extends the TerminatorTerminator class,
4094 4126 // hence its should_exit_termination() method will also decide
4095 4127 // whether to exit the termination protocol or not.
4096 4128 bool finished = _cm->terminator()->offer_termination(this);
4097 4129 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4098 4130 _termination_time_ms +=
4099 4131 termination_end_time_ms - _termination_start_time_ms;
4100 4132
4101 4133 if (finished) {
4102 4134 // We're all done.
4103 4135
4104 4136 if (_task_id == 0) {
4105 4137 // let's allow task 0 to do this
4106 4138 if (concurrent()) {
4107 4139 assert(_cm->concurrent_marking_in_progress(), "invariant");
4108 4140 // we need to set this to false before the next
4109 4141 // safepoint. This way we ensure that the marking phase
4110 4142 // doesn't observe any more heap expansions.
4111 4143 _cm->clear_concurrent_marking_in_progress();
4112 4144 }
4113 4145 }
4114 4146
4115 4147 // We can now guarantee that the global stack is empty, since
4116 4148 // all other tasks have finished. We separated the guarantees so
4117 4149 // that, if a condition is false, we can immediately find out
4118 4150 // which one.
4119 4151 guarantee(_cm->out_of_regions(), "only way to reach here");
4120 4152 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4121 4153 guarantee(_task_queue->size() == 0, "only way to reach here");
4122 4154 guarantee(!_cm->has_overflown(), "only way to reach here");
4123 4155 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4124 4156
4125 4157 if (_cm->verbose_low()) {
4126 4158 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4127 4159 }
4128 4160 } else {
4129 4161 // Apparently there's more work to do. Let's abort this task. It
4130 4162 // will restart it and we can hopefully find more things to do.
4131 4163
4132 4164 if (_cm->verbose_low()) {
4133 4165 gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4134 4166 _task_id);
4135 4167 }
4136 4168
4137 4169 set_has_aborted();
4138 4170 statsOnly( ++_aborted_termination );
4139 4171 }
4140 4172 }
4141 4173
4142 4174 // Mainly for debugging purposes to make sure that a pointer to the
4143 4175 // closure which was statically allocated in this frame doesn't
4144 4176 // escape it by accident.
4145 4177 set_cm_oop_closure(NULL);
4146 4178 double end_time_ms = os::elapsedVTime() * 1000.0;
4147 4179 double elapsed_time_ms = end_time_ms - _start_time_ms;
4148 4180 // Update the step history.
4149 4181 _step_times_ms.add(elapsed_time_ms);
4150 4182
4151 4183 if (has_aborted()) {
4152 4184 // The task was aborted for some reason.
4153 4185
4154 4186 statsOnly( ++_aborted );
4155 4187
4156 4188 if (_has_timed_out) {
4157 4189 double diff_ms = elapsed_time_ms - _time_target_ms;
4158 4190 // Keep statistics of how well we did with respect to hitting
4159 4191 // our target only if we actually timed out (if we aborted for
4160 4192 // other reasons, then the results might get skewed).
4161 4193 _marking_step_diffs_ms.add(diff_ms);
4162 4194 }
4163 4195
4164 4196 if (_cm->has_overflown()) {
4165 4197 // This is the interesting one. We aborted because a global
4166 4198 // overflow was raised. This means we have to restart the
4167 4199 // marking phase and start iterating over regions. However, in
4168 4200 // order to do this we have to make sure that all tasks stop
4169 4201 // what they are doing and re-initialise in a safe manner. We
4170 4202 // will achieve this with the use of two barrier sync points.
4171 4203
4172 4204 if (_cm->verbose_low()) {
4173 4205 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4174 4206 }
4175 4207
4176 4208 _cm->enter_first_sync_barrier(_task_id);
4177 4209 // When we exit this sync barrier we know that all tasks have
4178 4210 // stopped doing marking work. So, it's now safe to
4179 4211 // re-initialise our data structures. At the end of this method,
4180 4212 // task 0 will clear the global data structures.
4181 4213
4182 4214 statsOnly( ++_aborted_overflow );
4183 4215
4184 4216 // We clear the local state of this task...
4185 4217 clear_region_fields();
4186 4218
4187 4219 // ...and enter the second barrier.
4188 4220 _cm->enter_second_sync_barrier(_task_id);
4189 4221 // At this point everything has bee re-initialised and we're
4190 4222 // ready to restart.
4191 4223 }
4192 4224
4193 4225 if (_cm->verbose_low()) {
4194 4226 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4195 4227 "elapsed = %1.2lfms <<<<<<<<<<",
4196 4228 _task_id, _time_target_ms, elapsed_time_ms);
4197 4229 if (_cm->has_aborted()) {
4198 4230 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4199 4231 _task_id);
4200 4232 }
4201 4233 }
4202 4234 } else {
4203 4235 if (_cm->verbose_low()) {
4204 4236 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4205 4237 "elapsed = %1.2lfms <<<<<<<<<<",
4206 4238 _task_id, _time_target_ms, elapsed_time_ms);
4207 4239 }
4208 4240 }
4209 4241
4210 4242 _claimed = false;
4211 4243 }
4212 4244
4213 4245 CMTask::CMTask(int task_id,
4214 4246 ConcurrentMark* cm,
4215 4247 size_t* marked_bytes,
4216 4248 BitMap* card_bm,
4217 4249 CMTaskQueue* task_queue,
4218 4250 CMTaskQueueSet* task_queues)
4219 4251 : _g1h(G1CollectedHeap::heap()),
4220 4252 _task_id(task_id), _cm(cm),
4221 4253 _claimed(false),
4222 4254 _nextMarkBitMap(NULL), _hash_seed(17),
4223 4255 _task_queue(task_queue),
4224 4256 _task_queues(task_queues),
4225 4257 _cm_oop_closure(NULL),
4226 4258 _marked_bytes_array(marked_bytes),
4227 4259 _card_bm(card_bm) {
4228 4260 guarantee(task_queue != NULL, "invariant");
4229 4261 guarantee(task_queues != NULL, "invariant");
4230 4262
4231 4263 statsOnly( _clock_due_to_scanning = 0;
4232 4264 _clock_due_to_marking = 0 );
4233 4265
4234 4266 _marking_step_diffs_ms.add(0.5);
4235 4267 }
4236 4268
4237 4269 // These are formatting macros that are used below to ensure
4238 4270 // consistent formatting. The *_H_* versions are used to format the
4239 4271 // header for a particular value and they should be kept consistent
4240 4272 // with the corresponding macro. Also note that most of the macros add
4241 4273 // the necessary white space (as a prefix) which makes them a bit
4242 4274 // easier to compose.
4243 4275
4244 4276 // All the output lines are prefixed with this string to be able to
4245 4277 // identify them easily in a large log file.
4246 4278 #define G1PPRL_LINE_PREFIX "###"
4247 4279
4248 4280 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4249 4281 #ifdef _LP64
4250 4282 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4251 4283 #else // _LP64
4252 4284 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4253 4285 #endif // _LP64
4254 4286
4255 4287 // For per-region info
4256 4288 #define G1PPRL_TYPE_FORMAT " %-4s"
4257 4289 #define G1PPRL_TYPE_H_FORMAT " %4s"
4258 4290 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4259 4291 #define G1PPRL_BYTE_H_FORMAT " %9s"
4260 4292 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4261 4293 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4262 4294
4263 4295 // For summary info
4264 4296 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4265 4297 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4266 4298 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4267 4299 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4268 4300
4269 4301 G1PrintRegionLivenessInfoClosure::
4270 4302 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4271 4303 : _out(out),
4272 4304 _total_used_bytes(0), _total_capacity_bytes(0),
4273 4305 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4274 4306 _hum_used_bytes(0), _hum_capacity_bytes(0),
4275 4307 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4276 4308 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4277 4309 MemRegion g1_committed = g1h->g1_committed();
4278 4310 MemRegion g1_reserved = g1h->g1_reserved();
4279 4311 double now = os::elapsedTime();
4280 4312
4281 4313 // Print the header of the output.
4282 4314 _out->cr();
4283 4315 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4284 4316 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4285 4317 G1PPRL_SUM_ADDR_FORMAT("committed")
4286 4318 G1PPRL_SUM_ADDR_FORMAT("reserved")
4287 4319 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4288 4320 g1_committed.start(), g1_committed.end(),
4289 4321 g1_reserved.start(), g1_reserved.end(),
4290 4322 HeapRegion::GrainBytes);
4291 4323 _out->print_cr(G1PPRL_LINE_PREFIX);
4292 4324 _out->print_cr(G1PPRL_LINE_PREFIX
4293 4325 G1PPRL_TYPE_H_FORMAT
4294 4326 G1PPRL_ADDR_BASE_H_FORMAT
4295 4327 G1PPRL_BYTE_H_FORMAT
4296 4328 G1PPRL_BYTE_H_FORMAT
4297 4329 G1PPRL_BYTE_H_FORMAT
4298 4330 G1PPRL_DOUBLE_H_FORMAT,
4299 4331 "type", "address-range",
4300 4332 "used", "prev-live", "next-live", "gc-eff");
4301 4333 _out->print_cr(G1PPRL_LINE_PREFIX
4302 4334 G1PPRL_TYPE_H_FORMAT
4303 4335 G1PPRL_ADDR_BASE_H_FORMAT
4304 4336 G1PPRL_BYTE_H_FORMAT
4305 4337 G1PPRL_BYTE_H_FORMAT
4306 4338 G1PPRL_BYTE_H_FORMAT
4307 4339 G1PPRL_DOUBLE_H_FORMAT,
4308 4340 "", "",
4309 4341 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4310 4342 }
4311 4343
4312 4344 // It takes as a parameter a reference to one of the _hum_* fields, it
4313 4345 // deduces the corresponding value for a region in a humongous region
4314 4346 // series (either the region size, or what's left if the _hum_* field
4315 4347 // is < the region size), and updates the _hum_* field accordingly.
4316 4348 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4317 4349 size_t bytes = 0;
4318 4350 // The > 0 check is to deal with the prev and next live bytes which
4319 4351 // could be 0.
4320 4352 if (*hum_bytes > 0) {
4321 4353 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4322 4354 *hum_bytes -= bytes;
4323 4355 }
4324 4356 return bytes;
4325 4357 }
4326 4358
4327 4359 // It deduces the values for a region in a humongous region series
4328 4360 // from the _hum_* fields and updates those accordingly. It assumes
4329 4361 // that that _hum_* fields have already been set up from the "starts
4330 4362 // humongous" region and we visit the regions in address order.
4331 4363 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4332 4364 size_t* capacity_bytes,
4333 4365 size_t* prev_live_bytes,
4334 4366 size_t* next_live_bytes) {
4335 4367 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4336 4368 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4337 4369 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4338 4370 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4339 4371 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4340 4372 }
4341 4373
4342 4374 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4343 4375 const char* type = "";
4344 4376 HeapWord* bottom = r->bottom();
4345 4377 HeapWord* end = r->end();
4346 4378 size_t capacity_bytes = r->capacity();
4347 4379 size_t used_bytes = r->used();
4348 4380 size_t prev_live_bytes = r->live_bytes();
4349 4381 size_t next_live_bytes = r->next_live_bytes();
4350 4382 double gc_eff = r->gc_efficiency();
4351 4383 if (r->used() == 0) {
4352 4384 type = "FREE";
4353 4385 } else if (r->is_survivor()) {
4354 4386 type = "SURV";
4355 4387 } else if (r->is_young()) {
4356 4388 type = "EDEN";
4357 4389 } else if (r->startsHumongous()) {
4358 4390 type = "HUMS";
4359 4391
4360 4392 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4361 4393 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4362 4394 "they should have been zeroed after the last time we used them");
4363 4395 // Set up the _hum_* fields.
4364 4396 _hum_capacity_bytes = capacity_bytes;
4365 4397 _hum_used_bytes = used_bytes;
4366 4398 _hum_prev_live_bytes = prev_live_bytes;
4367 4399 _hum_next_live_bytes = next_live_bytes;
4368 4400 get_hum_bytes(&used_bytes, &capacity_bytes,
4369 4401 &prev_live_bytes, &next_live_bytes);
4370 4402 end = bottom + HeapRegion::GrainWords;
4371 4403 } else if (r->continuesHumongous()) {
4372 4404 type = "HUMC";
4373 4405 get_hum_bytes(&used_bytes, &capacity_bytes,
4374 4406 &prev_live_bytes, &next_live_bytes);
4375 4407 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4376 4408 } else {
4377 4409 type = "OLD";
4378 4410 }
4379 4411
4380 4412 _total_used_bytes += used_bytes;
4381 4413 _total_capacity_bytes += capacity_bytes;
4382 4414 _total_prev_live_bytes += prev_live_bytes;
4383 4415 _total_next_live_bytes += next_live_bytes;
4384 4416
4385 4417 // Print a line for this particular region.
4386 4418 _out->print_cr(G1PPRL_LINE_PREFIX
4387 4419 G1PPRL_TYPE_FORMAT
4388 4420 G1PPRL_ADDR_BASE_FORMAT
4389 4421 G1PPRL_BYTE_FORMAT
4390 4422 G1PPRL_BYTE_FORMAT
4391 4423 G1PPRL_BYTE_FORMAT
4392 4424 G1PPRL_DOUBLE_FORMAT,
4393 4425 type, bottom, end,
4394 4426 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4395 4427
4396 4428 return false;
4397 4429 }
4398 4430
4399 4431 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4400 4432 // Print the footer of the output.
4401 4433 _out->print_cr(G1PPRL_LINE_PREFIX);
4402 4434 _out->print_cr(G1PPRL_LINE_PREFIX
4403 4435 " SUMMARY"
4404 4436 G1PPRL_SUM_MB_FORMAT("capacity")
4405 4437 G1PPRL_SUM_MB_PERC_FORMAT("used")
4406 4438 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4407 4439 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4408 4440 bytes_to_mb(_total_capacity_bytes),
4409 4441 bytes_to_mb(_total_used_bytes),
4410 4442 perc(_total_used_bytes, _total_capacity_bytes),
4411 4443 bytes_to_mb(_total_prev_live_bytes),
4412 4444 perc(_total_prev_live_bytes, _total_capacity_bytes),
4413 4445 bytes_to_mb(_total_next_live_bytes),
4414 4446 perc(_total_next_live_bytes, _total_capacity_bytes));
4415 4447 _out->cr();
4416 4448 }
↓ open down ↓ |
1451 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX