Print this page
rev 2896 : 7121547: G1: High number mispredicted branches while iterating over the marking bitmap
Summary: There is a high number of mispredicted branches associated with calling BitMap::iteratate() from within CMBitMapRO::iterate(). Implement a version of CMBitMapRO::iterate() directly using inline-able routines.
Reviewed-by:
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
33 33 #include "gc_implementation/g1/g1RemSet.hpp"
34 34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
35 35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
36 36 #include "gc_implementation/shared/vmGCOperations.hpp"
37 37 #include "memory/genOopClosures.inline.hpp"
38 38 #include "memory/referencePolicy.hpp"
39 39 #include "memory/resourceArea.hpp"
40 40 #include "oops/oop.inline.hpp"
41 41 #include "runtime/handles.inline.hpp"
42 42 #include "runtime/java.hpp"
43 43
44 44 //
45 45 // CMS Bit Map Wrapper
46 46
47 47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
48 48 _bm((uintptr_t*)NULL,0),
49 49 _shifter(shifter) {
50 50 _bmStartWord = (HeapWord*)(rs.base());
51 51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
52 52 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
53 53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
54 54
55 55 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
56 56 // For now we'll just commit all of the bit map up fromt.
57 57 // Later on we'll try to be more parsimonious with swap.
58 58 guarantee(_virtual_space.initialize(brs, brs.size()),
59 59 "couldn't reseve backing store for CMS bit map");
60 60 assert(_virtual_space.committed_size() == brs.size(),
61 61 "didn't reserve backing store for all of CMS bit map?");
62 62 _bm.set_map((uintptr_t*)_virtual_space.low());
63 63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
64 64 _bmWordSize, "inconsistency in bit map sizing");
65 65 _bm.set_size(_bmWordSize >> _shifter);
66 66 }
67 67
68 68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
69 69 HeapWord* limit) const {
70 70 // First we must round addr *up* to a possible object boundary.
71 71 addr = (HeapWord*)align_size_up((intptr_t)addr,
72 72 HeapWordSize << _shifter);
73 73 size_t addrOffset = heapWordToOffset(addr);
74 74 if (limit == NULL) {
75 75 limit = _bmStartWord + _bmWordSize;
76 76 }
77 77 size_t limitOffset = heapWordToOffset(limit);
78 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
79 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
80 80 assert(nextAddr >= addr, "get_next_one postcondition");
81 81 assert(nextAddr == limit || isMarked(nextAddr),
82 82 "get_next_one postcondition");
83 83 return nextAddr;
84 84 }
85 85
86 86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
87 87 HeapWord* limit) const {
88 88 size_t addrOffset = heapWordToOffset(addr);
89 89 if (limit == NULL) {
90 90 limit = _bmStartWord + _bmWordSize;
91 91 }
92 92 size_t limitOffset = heapWordToOffset(limit);
93 93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
94 94 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
95 95 assert(nextAddr >= addr, "get_next_one postcondition");
↓ open down ↓ |
95 lines elided |
↑ open up ↑ |
96 96 assert(nextAddr == limit || !isMarked(nextAddr),
97 97 "get_next_one postcondition");
98 98 return nextAddr;
99 99 }
100 100
101 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
102 102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
103 103 return (int) (diff >> _shifter);
104 104 }
105 105
106 -bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
107 - HeapWord* left = MAX2(_bmStartWord, mr.start());
108 - HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
109 - if (right > left) {
110 - // Right-open interval [leftOffset, rightOffset).
111 - return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
112 - } else {
113 - return true;
114 - }
115 -}
116 -
117 106 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
118 107 size_t from_start_index,
119 108 HeapWord* to_start_word,
120 109 size_t word_num) {
121 110 _bm.mostly_disjoint_range_union(from_bitmap,
122 111 from_start_index,
123 112 heapWordToOffset(to_start_word),
124 113 word_num);
125 114 }
126 115
127 116 #ifndef PRODUCT
128 117 bool CMBitMapRO::covers(ReservedSpace rs) const {
129 118 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
130 119 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
131 120 "size inconsistency");
132 121 return _bmStartWord == (HeapWord*)(rs.base()) &&
133 122 _bmWordSize == rs.size()>>LogHeapWordSize;
134 123 }
135 124 #endif
136 125
137 126 void CMBitMap::clearAll() {
138 127 _bm.clear();
139 128 return;
140 129 }
141 130
142 131 void CMBitMap::markRange(MemRegion mr) {
143 132 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
144 133 assert(!mr.is_empty(), "unexpected empty region");
145 134 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
146 135 ((HeapWord *) mr.end())),
147 136 "markRange memory region end is not card aligned");
148 137 // convert address range into offset range
149 138 _bm.at_put_range(heapWordToOffset(mr.start()),
150 139 heapWordToOffset(mr.end()), true);
151 140 }
152 141
153 142 void CMBitMap::clearRange(MemRegion mr) {
154 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
155 144 assert(!mr.is_empty(), "unexpected empty region");
156 145 // convert address range into offset range
157 146 _bm.at_put_range(heapWordToOffset(mr.start()),
158 147 heapWordToOffset(mr.end()), false);
159 148 }
160 149
161 150 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
162 151 HeapWord* end_addr) {
163 152 HeapWord* start = getNextMarkedWordAddress(addr);
164 153 start = MIN2(start, end_addr);
165 154 HeapWord* end = getNextUnmarkedWordAddress(start);
166 155 end = MIN2(end, end_addr);
167 156 assert(start <= end, "Consistency check");
168 157 MemRegion mr(start, end);
169 158 if (!mr.is_empty()) {
170 159 clearRange(mr);
171 160 }
172 161 return mr;
173 162 }
174 163
175 164 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
176 165 _base(NULL), _cm(cm)
177 166 #ifdef ASSERT
178 167 , _drain_in_progress(false)
179 168 , _drain_in_progress_yields(false)
180 169 #endif
181 170 {}
182 171
183 172 void CMMarkStack::allocate(size_t size) {
184 173 _base = NEW_C_HEAP_ARRAY(oop, size);
185 174 if (_base == NULL) {
186 175 vm_exit_during_initialization("Failed to allocate "
187 176 "CM region mark stack");
188 177 }
189 178 _index = 0;
190 179 _capacity = (jint) size;
191 180 _oops_do_bound = -1;
192 181 NOT_PRODUCT(_max_depth = 0);
193 182 }
194 183
195 184 CMMarkStack::~CMMarkStack() {
196 185 if (_base != NULL) {
197 186 FREE_C_HEAP_ARRAY(oop, _base);
198 187 }
199 188 }
200 189
201 190 void CMMarkStack::par_push(oop ptr) {
202 191 while (true) {
203 192 if (isFull()) {
204 193 _overflow = true;
205 194 return;
206 195 }
207 196 // Otherwise...
208 197 jint index = _index;
209 198 jint next_index = index+1;
210 199 jint res = Atomic::cmpxchg(next_index, &_index, index);
211 200 if (res == index) {
212 201 _base[index] = ptr;
213 202 // Note that we don't maintain this atomically. We could, but it
214 203 // doesn't seem necessary.
215 204 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
216 205 return;
217 206 }
218 207 // Otherwise, we need to try again.
219 208 }
220 209 }
221 210
222 211 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
223 212 while (true) {
224 213 if (isFull()) {
225 214 _overflow = true;
226 215 return;
227 216 }
228 217 // Otherwise...
229 218 jint index = _index;
230 219 jint next_index = index + n;
231 220 if (next_index > _capacity) {
232 221 _overflow = true;
233 222 return;
234 223 }
235 224 jint res = Atomic::cmpxchg(next_index, &_index, index);
236 225 if (res == index) {
237 226 for (int i = 0; i < n; i++) {
238 227 int ind = index + i;
239 228 assert(ind < _capacity, "By overflow test above.");
240 229 _base[ind] = ptr_arr[i];
241 230 }
242 231 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
243 232 return;
244 233 }
245 234 // Otherwise, we need to try again.
246 235 }
247 236 }
248 237
249 238
250 239 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
251 240 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
252 241 jint start = _index;
253 242 jint next_index = start + n;
254 243 if (next_index > _capacity) {
255 244 _overflow = true;
256 245 return;
257 246 }
258 247 // Otherwise.
259 248 _index = next_index;
260 249 for (int i = 0; i < n; i++) {
261 250 int ind = start + i;
262 251 assert(ind < _capacity, "By overflow test above.");
263 252 _base[ind] = ptr_arr[i];
264 253 }
265 254 }
266 255
267 256
268 257 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
269 258 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
270 259 jint index = _index;
271 260 if (index == 0) {
272 261 *n = 0;
273 262 return false;
274 263 } else {
275 264 int k = MIN2(max, index);
276 265 jint new_ind = index - k;
277 266 for (int j = 0; j < k; j++) {
278 267 ptr_arr[j] = _base[new_ind + j];
279 268 }
280 269 _index = new_ind;
281 270 *n = k;
282 271 return true;
283 272 }
284 273 }
285 274
286 275
287 276 CMRegionStack::CMRegionStack() : _base(NULL) {}
288 277
289 278 void CMRegionStack::allocate(size_t size) {
290 279 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
291 280 if (_base == NULL) {
292 281 vm_exit_during_initialization("Failed to allocate CM region mark stack");
293 282 }
294 283 _index = 0;
295 284 _capacity = (jint) size;
296 285 }
297 286
298 287 CMRegionStack::~CMRegionStack() {
299 288 if (_base != NULL) {
300 289 FREE_C_HEAP_ARRAY(oop, _base);
301 290 }
302 291 }
303 292
304 293 void CMRegionStack::push_lock_free(MemRegion mr) {
305 294 assert(mr.word_size() > 0, "Precondition");
306 295 while (true) {
307 296 jint index = _index;
308 297
309 298 if (index >= _capacity) {
310 299 _overflow = true;
311 300 return;
312 301 }
313 302 // Otherwise...
314 303 jint next_index = index+1;
315 304 jint res = Atomic::cmpxchg(next_index, &_index, index);
316 305 if (res == index) {
317 306 _base[index] = mr;
318 307 return;
319 308 }
320 309 // Otherwise, we need to try again.
321 310 }
322 311 }
323 312
324 313 // Lock-free pop of the region stack. Called during the concurrent
325 314 // marking / remark phases. Should only be called in tandem with
326 315 // other lock-free pops.
327 316 MemRegion CMRegionStack::pop_lock_free() {
328 317 while (true) {
329 318 jint index = _index;
330 319
331 320 if (index == 0) {
332 321 return MemRegion();
333 322 }
334 323 // Otherwise...
335 324 jint next_index = index-1;
336 325 jint res = Atomic::cmpxchg(next_index, &_index, index);
337 326 if (res == index) {
338 327 MemRegion mr = _base[next_index];
339 328 if (mr.start() != NULL) {
340 329 assert(mr.end() != NULL, "invariant");
341 330 assert(mr.word_size() > 0, "invariant");
342 331 return mr;
343 332 } else {
344 333 // that entry was invalidated... let's skip it
345 334 assert(mr.end() == NULL, "invariant");
346 335 }
347 336 }
348 337 // Otherwise, we need to try again.
349 338 }
350 339 }
351 340
352 341 #if 0
353 342 // The routines that manipulate the region stack with a lock are
354 343 // not currently used. They should be retained, however, as a
355 344 // diagnostic aid.
356 345
357 346 void CMRegionStack::push_with_lock(MemRegion mr) {
358 347 assert(mr.word_size() > 0, "Precondition");
359 348 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
360 349
361 350 if (isFull()) {
362 351 _overflow = true;
363 352 return;
364 353 }
365 354
366 355 _base[_index] = mr;
367 356 _index += 1;
368 357 }
369 358
370 359 MemRegion CMRegionStack::pop_with_lock() {
371 360 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
372 361
373 362 while (true) {
374 363 if (_index == 0) {
375 364 return MemRegion();
376 365 }
377 366 _index -= 1;
378 367
379 368 MemRegion mr = _base[_index];
380 369 if (mr.start() != NULL) {
381 370 assert(mr.end() != NULL, "invariant");
382 371 assert(mr.word_size() > 0, "invariant");
383 372 return mr;
384 373 } else {
385 374 // that entry was invalidated... let's skip it
386 375 assert(mr.end() == NULL, "invariant");
387 376 }
388 377 }
389 378 }
390 379 #endif
391 380
392 381 bool CMRegionStack::invalidate_entries_into_cset() {
393 382 bool result = false;
394 383 G1CollectedHeap* g1h = G1CollectedHeap::heap();
395 384 for (int i = 0; i < _oops_do_bound; ++i) {
396 385 MemRegion mr = _base[i];
397 386 if (mr.start() != NULL) {
398 387 assert(mr.end() != NULL, "invariant");
399 388 assert(mr.word_size() > 0, "invariant");
400 389 HeapRegion* hr = g1h->heap_region_containing(mr.start());
401 390 assert(hr != NULL, "invariant");
402 391 if (hr->in_collection_set()) {
403 392 // The region points into the collection set
404 393 _base[i] = MemRegion();
405 394 result = true;
406 395 }
407 396 } else {
408 397 // that entry was invalidated... let's skip it
409 398 assert(mr.end() == NULL, "invariant");
410 399 }
411 400 }
412 401 return result;
413 402 }
414 403
415 404 template<class OopClosureClass>
416 405 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
417 406 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
418 407 || SafepointSynchronize::is_at_safepoint(),
419 408 "Drain recursion must be yield-safe.");
420 409 bool res = true;
421 410 debug_only(_drain_in_progress = true);
422 411 debug_only(_drain_in_progress_yields = yield_after);
423 412 while (!isEmpty()) {
424 413 oop newOop = pop();
425 414 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
426 415 assert(newOop->is_oop(), "Expected an oop");
427 416 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
428 417 "only grey objects on this stack");
429 418 // iterate over the oops in this oop, marking and pushing
430 419 // the ones in CMS generation.
431 420 newOop->oop_iterate(cl);
432 421 if (yield_after && _cm->do_yield_check()) {
433 422 res = false;
434 423 break;
435 424 }
436 425 }
437 426 debug_only(_drain_in_progress = false);
438 427 return res;
439 428 }
440 429
441 430 void CMMarkStack::oops_do(OopClosure* f) {
442 431 if (_index == 0) return;
443 432 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
444 433 "Bound must be set.");
445 434 for (int i = 0; i < _oops_do_bound; i++) {
446 435 f->do_oop(&_base[i]);
447 436 }
448 437 _oops_do_bound = -1;
449 438 }
450 439
451 440 bool ConcurrentMark::not_yet_marked(oop obj) const {
452 441 return (_g1h->is_obj_ill(obj)
453 442 || (_g1h->is_in_permanent(obj)
454 443 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
455 444 }
456 445
457 446 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
458 447 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
459 448 #endif // _MSC_VER
460 449
461 450 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
462 451 return MAX2((n_par_threads + 2) / 4, (size_t)1);
463 452 }
464 453
465 454 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
466 455 int max_regions) :
467 456 _markBitMap1(rs, MinObjAlignment - 1),
468 457 _markBitMap2(rs, MinObjAlignment - 1),
469 458
470 459 _parallel_marking_threads(0),
471 460 _max_parallel_marking_threads(0),
472 461 _sleep_factor(0.0),
473 462 _marking_task_overhead(1.0),
474 463 _cleanup_sleep_factor(0.0),
475 464 _cleanup_task_overhead(1.0),
476 465 _cleanup_list("Cleanup List"),
477 466 _region_bm(max_regions, false /* in_resource_area*/),
478 467 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
479 468 CardTableModRefBS::card_shift,
480 469 false /* in_resource_area*/),
481 470 _prevMarkBitMap(&_markBitMap1),
482 471 _nextMarkBitMap(&_markBitMap2),
483 472 _at_least_one_mark_complete(false),
484 473
485 474 _markStack(this),
486 475 _regionStack(),
487 476 // _finger set in set_non_marking_state
488 477
489 478 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
490 479 // _active_tasks set in set_non_marking_state
491 480 // _tasks set inside the constructor
492 481 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
493 482 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
494 483
495 484 _has_overflown(false),
496 485 _concurrent(false),
497 486 _has_aborted(false),
498 487 _restart_for_overflow(false),
499 488 _concurrent_marking_in_progress(false),
500 489 _should_gray_objects(false),
501 490
502 491 // _verbose_level set below
503 492
504 493 _init_times(),
505 494 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
506 495 _cleanup_times(),
507 496 _total_counting_time(0.0),
508 497 _total_rs_scrub_time(0.0),
509 498
510 499 _parallel_workers(NULL) {
511 500 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
512 501 if (verbose_level < no_verbose) {
513 502 verbose_level = no_verbose;
514 503 }
515 504 if (verbose_level > high_verbose) {
516 505 verbose_level = high_verbose;
517 506 }
518 507 _verbose_level = verbose_level;
519 508
520 509 if (verbose_low()) {
521 510 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
522 511 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
523 512 }
524 513
525 514 _markStack.allocate(MarkStackSize);
526 515 _regionStack.allocate(G1MarkRegionStackSize);
527 516
528 517 // Create & start a ConcurrentMark thread.
529 518 _cmThread = new ConcurrentMarkThread(this);
530 519 assert(cmThread() != NULL, "CM Thread should have been created");
531 520 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
532 521
533 522 _g1h = G1CollectedHeap::heap();
534 523 assert(CGC_lock != NULL, "Where's the CGC_lock?");
535 524 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
536 525 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
537 526
538 527 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
539 528 satb_qs.set_buffer_size(G1SATBBufferSize);
540 529
541 530 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
542 531 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
543 532
544 533 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
545 534 _active_tasks = _max_task_num;
546 535 for (int i = 0; i < (int) _max_task_num; ++i) {
547 536 CMTaskQueue* task_queue = new CMTaskQueue();
548 537 task_queue->initialize();
549 538 _task_queues->register_queue(i, task_queue);
550 539
551 540 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
552 541 _accum_task_vtime[i] = 0.0;
553 542 }
554 543
555 544 if (ConcGCThreads > ParallelGCThreads) {
556 545 vm_exit_during_initialization("Can't have more ConcGCThreads "
557 546 "than ParallelGCThreads.");
558 547 }
559 548 if (ParallelGCThreads == 0) {
560 549 // if we are not running with any parallel GC threads we will not
561 550 // spawn any marking threads either
562 551 _parallel_marking_threads = 0;
563 552 _max_parallel_marking_threads = 0;
564 553 _sleep_factor = 0.0;
565 554 _marking_task_overhead = 1.0;
566 555 } else {
567 556 if (ConcGCThreads > 0) {
568 557 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
569 558 // if both are set
570 559
571 560 _parallel_marking_threads = ConcGCThreads;
572 561 _max_parallel_marking_threads = _parallel_marking_threads;
573 562 _sleep_factor = 0.0;
574 563 _marking_task_overhead = 1.0;
575 564 } else if (G1MarkingOverheadPercent > 0) {
576 565 // we will calculate the number of parallel marking threads
577 566 // based on a target overhead with respect to the soft real-time
578 567 // goal
579 568
580 569 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
581 570 double overall_cm_overhead =
582 571 (double) MaxGCPauseMillis * marking_overhead /
583 572 (double) GCPauseIntervalMillis;
584 573 double cpu_ratio = 1.0 / (double) os::processor_count();
585 574 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
586 575 double marking_task_overhead =
587 576 overall_cm_overhead / marking_thread_num *
588 577 (double) os::processor_count();
589 578 double sleep_factor =
590 579 (1.0 - marking_task_overhead) / marking_task_overhead;
591 580
592 581 _parallel_marking_threads = (size_t) marking_thread_num;
593 582 _max_parallel_marking_threads = _parallel_marking_threads;
594 583 _sleep_factor = sleep_factor;
595 584 _marking_task_overhead = marking_task_overhead;
596 585 } else {
597 586 _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
598 587 _max_parallel_marking_threads = _parallel_marking_threads;
599 588 _sleep_factor = 0.0;
600 589 _marking_task_overhead = 1.0;
601 590 }
602 591
603 592 if (parallel_marking_threads() > 1) {
604 593 _cleanup_task_overhead = 1.0;
605 594 } else {
606 595 _cleanup_task_overhead = marking_task_overhead();
607 596 }
608 597 _cleanup_sleep_factor =
609 598 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
610 599
611 600 #if 0
612 601 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
613 602 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
614 603 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
615 604 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
616 605 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
617 606 #endif
618 607
619 608 guarantee(parallel_marking_threads() > 0, "peace of mind");
620 609 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
621 610 (int) _max_parallel_marking_threads, false, true);
622 611 if (_parallel_workers == NULL) {
623 612 vm_exit_during_initialization("Failed necessary allocation.");
624 613 } else {
625 614 _parallel_workers->initialize_workers();
626 615 }
627 616 }
628 617
629 618 // so that the call below can read a sensible value
630 619 _heap_start = (HeapWord*) rs.base();
631 620 set_non_marking_state();
632 621 }
633 622
634 623 void ConcurrentMark::update_g1_committed(bool force) {
635 624 // If concurrent marking is not in progress, then we do not need to
636 625 // update _heap_end. This has a subtle and important
637 626 // side-effect. Imagine that two evacuation pauses happen between
638 627 // marking completion and remark. The first one can grow the
639 628 // heap (hence now the finger is below the heap end). Then, the
640 629 // second one could unnecessarily push regions on the region
641 630 // stack. This causes the invariant that the region stack is empty
642 631 // at the beginning of remark to be false. By ensuring that we do
643 632 // not observe heap expansions after marking is complete, then we do
644 633 // not have this problem.
645 634 if (!concurrent_marking_in_progress() && !force) return;
646 635
647 636 MemRegion committed = _g1h->g1_committed();
648 637 assert(committed.start() == _heap_start, "start shouldn't change");
649 638 HeapWord* new_end = committed.end();
650 639 if (new_end > _heap_end) {
651 640 // The heap has been expanded.
652 641
653 642 _heap_end = new_end;
654 643 }
655 644 // Notice that the heap can also shrink. However, this only happens
656 645 // during a Full GC (at least currently) and the entire marking
657 646 // phase will bail out and the task will not be restarted. So, let's
658 647 // do nothing.
659 648 }
660 649
661 650 void ConcurrentMark::reset() {
662 651 // Starting values for these two. This should be called in a STW
663 652 // phase. CM will be notified of any future g1_committed expansions
664 653 // will be at the end of evacuation pauses, when tasks are
665 654 // inactive.
666 655 MemRegion committed = _g1h->g1_committed();
667 656 _heap_start = committed.start();
668 657 _heap_end = committed.end();
669 658
670 659 // Separated the asserts so that we know which one fires.
671 660 assert(_heap_start != NULL, "heap bounds should look ok");
672 661 assert(_heap_end != NULL, "heap bounds should look ok");
673 662 assert(_heap_start < _heap_end, "heap bounds should look ok");
674 663
675 664 // reset all the marking data structures and any necessary flags
676 665 clear_marking_state();
677 666
678 667 if (verbose_low()) {
679 668 gclog_or_tty->print_cr("[global] resetting");
680 669 }
681 670
682 671 // We do reset all of them, since different phases will use
683 672 // different number of active threads. So, it's easiest to have all
684 673 // of them ready.
685 674 for (int i = 0; i < (int) _max_task_num; ++i) {
686 675 _tasks[i]->reset(_nextMarkBitMap);
687 676 }
688 677
689 678 // we need this to make sure that the flag is on during the evac
690 679 // pause with initial mark piggy-backed
691 680 set_concurrent_marking_in_progress();
692 681 }
693 682
694 683 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
695 684 assert(active_tasks <= _max_task_num, "we should not have more");
696 685
697 686 _active_tasks = active_tasks;
698 687 // Need to update the three data structures below according to the
699 688 // number of active threads for this phase.
700 689 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
701 690 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
702 691 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
703 692
704 693 _concurrent = concurrent;
705 694 // We propagate this to all tasks, not just the active ones.
706 695 for (int i = 0; i < (int) _max_task_num; ++i)
707 696 _tasks[i]->set_concurrent(concurrent);
708 697
709 698 if (concurrent) {
710 699 set_concurrent_marking_in_progress();
711 700 } else {
712 701 // We currently assume that the concurrent flag has been set to
713 702 // false before we start remark. At this point we should also be
714 703 // in a STW phase.
715 704 assert(!concurrent_marking_in_progress(), "invariant");
716 705 assert(_finger == _heap_end, "only way to get here");
717 706 update_g1_committed(true);
718 707 }
719 708 }
720 709
721 710 void ConcurrentMark::set_non_marking_state() {
722 711 // We set the global marking state to some default values when we're
723 712 // not doing marking.
724 713 clear_marking_state();
725 714 _active_tasks = 0;
726 715 clear_concurrent_marking_in_progress();
727 716 }
728 717
729 718 ConcurrentMark::~ConcurrentMark() {
730 719 for (int i = 0; i < (int) _max_task_num; ++i) {
731 720 delete _task_queues->queue(i);
732 721 delete _tasks[i];
733 722 }
734 723 delete _task_queues;
735 724 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
736 725 }
737 726
738 727 // This closure is used to mark refs into the g1 generation
739 728 // from external roots in the CMS bit map.
740 729 // Called at the first checkpoint.
741 730 //
742 731
743 732 void ConcurrentMark::clearNextBitmap() {
744 733 G1CollectedHeap* g1h = G1CollectedHeap::heap();
745 734 G1CollectorPolicy* g1p = g1h->g1_policy();
746 735
747 736 // Make sure that the concurrent mark thread looks to still be in
748 737 // the current cycle.
749 738 guarantee(cmThread()->during_cycle(), "invariant");
750 739
751 740 // We are finishing up the current cycle by clearing the next
752 741 // marking bitmap and getting it ready for the next cycle. During
753 742 // this time no other cycle can start. So, let's make sure that this
754 743 // is the case.
755 744 guarantee(!g1h->mark_in_progress(), "invariant");
756 745
757 746 // clear the mark bitmap (no grey objects to start with).
758 747 // We need to do this in chunks and offer to yield in between
759 748 // each chunk.
760 749 HeapWord* start = _nextMarkBitMap->startWord();
761 750 HeapWord* end = _nextMarkBitMap->endWord();
762 751 HeapWord* cur = start;
763 752 size_t chunkSize = M;
764 753 while (cur < end) {
765 754 HeapWord* next = cur + chunkSize;
766 755 if (next > end) {
767 756 next = end;
768 757 }
769 758 MemRegion mr(cur,next);
770 759 _nextMarkBitMap->clearRange(mr);
771 760 cur = next;
772 761 do_yield_check();
773 762
774 763 // Repeat the asserts from above. We'll do them as asserts here to
775 764 // minimize their overhead on the product. However, we'll have
776 765 // them as guarantees at the beginning / end of the bitmap
777 766 // clearing to get some checking in the product.
778 767 assert(cmThread()->during_cycle(), "invariant");
779 768 assert(!g1h->mark_in_progress(), "invariant");
780 769 }
781 770
782 771 // Repeat the asserts from above.
783 772 guarantee(cmThread()->during_cycle(), "invariant");
784 773 guarantee(!g1h->mark_in_progress(), "invariant");
785 774 }
786 775
787 776 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
788 777 public:
789 778 bool doHeapRegion(HeapRegion* r) {
790 779 if (!r->continuesHumongous()) {
791 780 r->note_start_of_marking(true);
792 781 }
793 782 return false;
794 783 }
795 784 };
796 785
797 786 void ConcurrentMark::checkpointRootsInitialPre() {
798 787 G1CollectedHeap* g1h = G1CollectedHeap::heap();
799 788 G1CollectorPolicy* g1p = g1h->g1_policy();
800 789
801 790 _has_aborted = false;
802 791
803 792 #ifndef PRODUCT
804 793 if (G1PrintReachableAtInitialMark) {
805 794 print_reachable("at-cycle-start",
806 795 VerifyOption_G1UsePrevMarking, true /* all */);
807 796 }
808 797 #endif
809 798
810 799 // Initialise marking structures. This has to be done in a STW phase.
811 800 reset();
812 801 }
813 802
814 803
815 804 void ConcurrentMark::checkpointRootsInitialPost() {
816 805 G1CollectedHeap* g1h = G1CollectedHeap::heap();
817 806
818 807 // If we force an overflow during remark, the remark operation will
819 808 // actually abort and we'll restart concurrent marking. If we always
820 809 // force an oveflow during remark we'll never actually complete the
821 810 // marking phase. So, we initilize this here, at the start of the
822 811 // cycle, so that at the remaining overflow number will decrease at
823 812 // every remark and we'll eventually not need to cause one.
824 813 force_overflow_stw()->init();
825 814
826 815 // For each region note start of marking.
827 816 NoteStartOfMarkHRClosure startcl;
828 817 g1h->heap_region_iterate(&startcl);
829 818
830 819 // Start Concurrent Marking weak-reference discovery.
831 820 ReferenceProcessor* rp = g1h->ref_processor_cm();
832 821 // enable ("weak") refs discovery
833 822 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
834 823 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
835 824
836 825 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
837 826 // This is the start of the marking cycle, we're expected all
838 827 // threads to have SATB queues with active set to false.
839 828 satb_mq_set.set_active_all_threads(true, /* new active value */
840 829 false /* expected_active */);
841 830
842 831 // update_g1_committed() will be called at the end of an evac pause
843 832 // when marking is on. So, it's also called at the end of the
844 833 // initial-mark pause to update the heap end, if the heap expands
845 834 // during it. No need to call it here.
846 835 }
847 836
848 837 /*
849 838 * Notice that in the next two methods, we actually leave the STS
850 839 * during the barrier sync and join it immediately afterwards. If we
851 840 * do not do this, the following deadlock can occur: one thread could
852 841 * be in the barrier sync code, waiting for the other thread to also
853 842 * sync up, whereas another one could be trying to yield, while also
854 843 * waiting for the other threads to sync up too.
855 844 *
856 845 * Note, however, that this code is also used during remark and in
857 846 * this case we should not attempt to leave / enter the STS, otherwise
858 847 * we'll either hit an asseert (debug / fastdebug) or deadlock
859 848 * (product). So we should only leave / enter the STS if we are
860 849 * operating concurrently.
861 850 *
862 851 * Because the thread that does the sync barrier has left the STS, it
863 852 * is possible to be suspended for a Full GC or an evacuation pause
864 853 * could occur. This is actually safe, since the entering the sync
865 854 * barrier is one of the last things do_marking_step() does, and it
866 855 * doesn't manipulate any data structures afterwards.
867 856 */
868 857
869 858 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
870 859 if (verbose_low()) {
871 860 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
872 861 }
873 862
874 863 if (concurrent()) {
875 864 ConcurrentGCThread::stsLeave();
876 865 }
877 866 _first_overflow_barrier_sync.enter();
878 867 if (concurrent()) {
879 868 ConcurrentGCThread::stsJoin();
880 869 }
881 870 // at this point everyone should have synced up and not be doing any
882 871 // more work
883 872
884 873 if (verbose_low()) {
885 874 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
886 875 }
887 876
888 877 // let task 0 do this
889 878 if (task_num == 0) {
890 879 // task 0 is responsible for clearing the global data structures
891 880 // We should be here because of an overflow. During STW we should
892 881 // not clear the overflow flag since we rely on it being true when
893 882 // we exit this method to abort the pause and restart concurent
894 883 // marking.
895 884 clear_marking_state(concurrent() /* clear_overflow */);
896 885 force_overflow()->update();
897 886
898 887 if (PrintGC) {
899 888 gclog_or_tty->date_stamp(PrintGCDateStamps);
900 889 gclog_or_tty->stamp(PrintGCTimeStamps);
901 890 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
902 891 }
903 892 }
904 893
905 894 // after this, each task should reset its own data structures then
906 895 // then go into the second barrier
907 896 }
908 897
909 898 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
910 899 if (verbose_low()) {
911 900 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
912 901 }
913 902
914 903 if (concurrent()) {
915 904 ConcurrentGCThread::stsLeave();
916 905 }
917 906 _second_overflow_barrier_sync.enter();
918 907 if (concurrent()) {
919 908 ConcurrentGCThread::stsJoin();
920 909 }
921 910 // at this point everything should be re-initialised and ready to go
922 911
923 912 if (verbose_low()) {
924 913 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
925 914 }
926 915 }
927 916
928 917 #ifndef PRODUCT
929 918 void ForceOverflowSettings::init() {
930 919 _num_remaining = G1ConcMarkForceOverflow;
931 920 _force = false;
932 921 update();
933 922 }
934 923
935 924 void ForceOverflowSettings::update() {
936 925 if (_num_remaining > 0) {
937 926 _num_remaining -= 1;
938 927 _force = true;
939 928 } else {
940 929 _force = false;
941 930 }
942 931 }
943 932
944 933 bool ForceOverflowSettings::should_force() {
945 934 if (_force) {
946 935 _force = false;
947 936 return true;
948 937 } else {
949 938 return false;
950 939 }
951 940 }
952 941 #endif // !PRODUCT
953 942
954 943 void ConcurrentMark::grayRoot(oop p) {
955 944 HeapWord* addr = (HeapWord*) p;
956 945 // We can't really check against _heap_start and _heap_end, since it
957 946 // is possible during an evacuation pause with piggy-backed
958 947 // initial-mark that the committed space is expanded during the
959 948 // pause without CM observing this change. So the assertions below
960 949 // is a bit conservative; but better than nothing.
961 950 assert(_g1h->g1_committed().contains(addr),
962 951 "address should be within the heap bounds");
963 952
964 953 if (!_nextMarkBitMap->isMarked(addr)) {
965 954 _nextMarkBitMap->parMark(addr);
966 955 }
967 956 }
968 957
969 958 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
970 959 // The objects on the region have already been marked "in bulk" by
971 960 // the caller. We only need to decide whether to push the region on
972 961 // the region stack or not.
973 962
974 963 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
975 964 // We're done with marking and waiting for remark. We do not need to
976 965 // push anything else on the region stack.
977 966 return;
978 967 }
979 968
980 969 HeapWord* finger = _finger;
981 970
982 971 if (verbose_low()) {
983 972 gclog_or_tty->print_cr("[global] attempting to push "
984 973 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
985 974 PTR_FORMAT, mr.start(), mr.end(), finger);
986 975 }
987 976
988 977 if (mr.start() < finger) {
989 978 // The finger is always heap region aligned and it is not possible
990 979 // for mr to span heap regions.
991 980 assert(mr.end() <= finger, "invariant");
992 981
993 982 // Separated the asserts so that we know which one fires.
994 983 assert(mr.start() <= mr.end(),
995 984 "region boundaries should fall within the committed space");
996 985 assert(_heap_start <= mr.start(),
997 986 "region boundaries should fall within the committed space");
998 987 assert(mr.end() <= _heap_end,
999 988 "region boundaries should fall within the committed space");
1000 989 if (verbose_low()) {
1001 990 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1002 991 "below the finger, pushing it",
1003 992 mr.start(), mr.end());
1004 993 }
1005 994
1006 995 if (!region_stack_push_lock_free(mr)) {
1007 996 if (verbose_low()) {
1008 997 gclog_or_tty->print_cr("[global] region stack has overflown.");
1009 998 }
1010 999 }
1011 1000 }
1012 1001 }
1013 1002
1014 1003 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1015 1004 // The object is not marked by the caller. We need to at least mark
1016 1005 // it and maybe push in on the stack.
1017 1006
1018 1007 HeapWord* addr = (HeapWord*)p;
1019 1008 if (!_nextMarkBitMap->isMarked(addr)) {
1020 1009 // We definitely need to mark it, irrespective whether we bail out
1021 1010 // because we're done with marking.
1022 1011 if (_nextMarkBitMap->parMark(addr)) {
1023 1012 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1024 1013 // If we're done with concurrent marking and we're waiting for
1025 1014 // remark, then we're not pushing anything on the stack.
1026 1015 return;
1027 1016 }
1028 1017
1029 1018 // No OrderAccess:store_load() is needed. It is implicit in the
1030 1019 // CAS done in parMark(addr) above
1031 1020 HeapWord* finger = _finger;
1032 1021
1033 1022 if (addr < finger) {
1034 1023 if (!mark_stack_push(oop(addr))) {
1035 1024 if (verbose_low()) {
1036 1025 gclog_or_tty->print_cr("[global] global stack overflow "
1037 1026 "during parMark");
1038 1027 }
1039 1028 }
1040 1029 }
1041 1030 }
1042 1031 }
1043 1032 }
1044 1033
1045 1034 class CMConcurrentMarkingTask: public AbstractGangTask {
1046 1035 private:
1047 1036 ConcurrentMark* _cm;
1048 1037 ConcurrentMarkThread* _cmt;
1049 1038
1050 1039 public:
1051 1040 void work(int worker_i) {
1052 1041 assert(Thread::current()->is_ConcurrentGC_thread(),
1053 1042 "this should only be done by a conc GC thread");
1054 1043 ResourceMark rm;
1055 1044
1056 1045 double start_vtime = os::elapsedVTime();
1057 1046
1058 1047 ConcurrentGCThread::stsJoin();
1059 1048
1060 1049 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1061 1050 CMTask* the_task = _cm->task(worker_i);
1062 1051 the_task->record_start_time();
1063 1052 if (!_cm->has_aborted()) {
1064 1053 do {
1065 1054 double start_vtime_sec = os::elapsedVTime();
1066 1055 double start_time_sec = os::elapsedTime();
1067 1056 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1068 1057
1069 1058 the_task->do_marking_step(mark_step_duration_ms,
1070 1059 true /* do_stealing */,
1071 1060 true /* do_termination */);
1072 1061
1073 1062 double end_time_sec = os::elapsedTime();
1074 1063 double end_vtime_sec = os::elapsedVTime();
1075 1064 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1076 1065 double elapsed_time_sec = end_time_sec - start_time_sec;
1077 1066 _cm->clear_has_overflown();
1078 1067
1079 1068 bool ret = _cm->do_yield_check(worker_i);
1080 1069
1081 1070 jlong sleep_time_ms;
1082 1071 if (!_cm->has_aborted() && the_task->has_aborted()) {
1083 1072 sleep_time_ms =
1084 1073 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1085 1074 ConcurrentGCThread::stsLeave();
1086 1075 os::sleep(Thread::current(), sleep_time_ms, false);
1087 1076 ConcurrentGCThread::stsJoin();
1088 1077 }
1089 1078 double end_time2_sec = os::elapsedTime();
1090 1079 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1091 1080
1092 1081 #if 0
1093 1082 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1094 1083 "overhead %1.4lf",
1095 1084 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1096 1085 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1097 1086 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1098 1087 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1099 1088 #endif
1100 1089 } while (!_cm->has_aborted() && the_task->has_aborted());
1101 1090 }
1102 1091 the_task->record_end_time();
1103 1092 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1104 1093
1105 1094 ConcurrentGCThread::stsLeave();
1106 1095
1107 1096 double end_vtime = os::elapsedVTime();
1108 1097 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1109 1098 }
1110 1099
1111 1100 CMConcurrentMarkingTask(ConcurrentMark* cm,
1112 1101 ConcurrentMarkThread* cmt) :
1113 1102 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1114 1103
1115 1104 ~CMConcurrentMarkingTask() { }
1116 1105 };
1117 1106
1118 1107 // Calculates the number of active workers for a concurrent
1119 1108 // phase.
1120 1109 size_t ConcurrentMark::calc_parallel_marking_threads() {
1121 1110 if (G1CollectedHeap::use_parallel_gc_threads()) {
1122 1111 size_t n_conc_workers = 0;
1123 1112 if (!UseDynamicNumberOfGCThreads ||
1124 1113 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1125 1114 !ForceDynamicNumberOfGCThreads)) {
1126 1115 n_conc_workers = max_parallel_marking_threads();
1127 1116 } else {
1128 1117 n_conc_workers =
1129 1118 AdaptiveSizePolicy::calc_default_active_workers(
1130 1119 max_parallel_marking_threads(),
1131 1120 1, /* Minimum workers */
1132 1121 parallel_marking_threads(),
1133 1122 Threads::number_of_non_daemon_threads());
1134 1123 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1135 1124 // that scaling has already gone into "_max_parallel_marking_threads".
1136 1125 }
1137 1126 assert(n_conc_workers > 0, "Always need at least 1");
1138 1127 return n_conc_workers;
1139 1128 }
1140 1129 // If we are not running with any parallel GC threads we will not
1141 1130 // have spawned any marking threads either. Hence the number of
1142 1131 // concurrent workers should be 0.
1143 1132 return 0;
1144 1133 }
1145 1134
1146 1135 void ConcurrentMark::markFromRoots() {
1147 1136 // we might be tempted to assert that:
1148 1137 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1149 1138 // "inconsistent argument?");
1150 1139 // However that wouldn't be right, because it's possible that
1151 1140 // a safepoint is indeed in progress as a younger generation
1152 1141 // stop-the-world GC happens even as we mark in this generation.
1153 1142
1154 1143 _restart_for_overflow = false;
1155 1144 force_overflow_conc()->init();
1156 1145
1157 1146 // _g1h has _n_par_threads
1158 1147 _parallel_marking_threads = calc_parallel_marking_threads();
1159 1148 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1160 1149 "Maximum number of marking threads exceeded");
1161 1150
1162 1151 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1163 1152
1164 1153 // Parallel task terminator is set in "set_phase()"
1165 1154 set_phase(active_workers, true /* concurrent */);
1166 1155
1167 1156 CMConcurrentMarkingTask markingTask(this, cmThread());
1168 1157 if (parallel_marking_threads() > 0) {
1169 1158 _parallel_workers->set_active_workers((int)active_workers);
1170 1159 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1171 1160 // and the decisions on that MT processing is made elsewhere.
1172 1161 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1173 1162 _parallel_workers->run_task(&markingTask);
1174 1163 } else {
1175 1164 markingTask.work(0);
1176 1165 }
1177 1166 print_stats();
1178 1167 }
1179 1168
1180 1169 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1181 1170 // world is stopped at this checkpoint
1182 1171 assert(SafepointSynchronize::is_at_safepoint(),
1183 1172 "world should be stopped");
1184 1173
1185 1174 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1186 1175
1187 1176 // If a full collection has happened, we shouldn't do this.
1188 1177 if (has_aborted()) {
1189 1178 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1190 1179 return;
1191 1180 }
1192 1181
1193 1182 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1194 1183
1195 1184 if (VerifyDuringGC) {
1196 1185 HandleMark hm; // handle scope
1197 1186 gclog_or_tty->print(" VerifyDuringGC:(before)");
1198 1187 Universe::heap()->prepare_for_verify();
1199 1188 Universe::verify(/* allow dirty */ true,
1200 1189 /* silent */ false,
1201 1190 /* option */ VerifyOption_G1UsePrevMarking);
1202 1191 }
1203 1192
1204 1193 G1CollectorPolicy* g1p = g1h->g1_policy();
1205 1194 g1p->record_concurrent_mark_remark_start();
1206 1195
1207 1196 double start = os::elapsedTime();
1208 1197
1209 1198 checkpointRootsFinalWork();
1210 1199
1211 1200 double mark_work_end = os::elapsedTime();
1212 1201
1213 1202 weakRefsWork(clear_all_soft_refs);
1214 1203
1215 1204 if (has_overflown()) {
1216 1205 // Oops. We overflowed. Restart concurrent marking.
1217 1206 _restart_for_overflow = true;
1218 1207 // Clear the flag. We do not need it any more.
1219 1208 clear_has_overflown();
1220 1209 if (G1TraceMarkStackOverflow) {
1221 1210 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1222 1211 }
1223 1212 } else {
1224 1213 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1225 1214 // We're done with marking.
1226 1215 // This is the end of the marking cycle, we're expected all
1227 1216 // threads to have SATB queues with active set to true.
1228 1217 satb_mq_set.set_active_all_threads(false, /* new active value */
1229 1218 true /* expected_active */);
1230 1219
1231 1220 if (VerifyDuringGC) {
1232 1221
1233 1222 HandleMark hm; // handle scope
1234 1223 gclog_or_tty->print(" VerifyDuringGC:(after)");
1235 1224 Universe::heap()->prepare_for_verify();
1236 1225 Universe::verify(/* allow dirty */ true,
1237 1226 /* silent */ false,
1238 1227 /* option */ VerifyOption_G1UseNextMarking);
1239 1228 }
1240 1229 assert(!restart_for_overflow(), "sanity");
1241 1230 }
1242 1231
1243 1232 // Reset the marking state if marking completed
1244 1233 if (!restart_for_overflow()) {
1245 1234 set_non_marking_state();
1246 1235 }
1247 1236
1248 1237 #if VERIFY_OBJS_PROCESSED
1249 1238 _scan_obj_cl.objs_processed = 0;
1250 1239 ThreadLocalObjQueue::objs_enqueued = 0;
1251 1240 #endif
1252 1241
1253 1242 // Statistics
1254 1243 double now = os::elapsedTime();
1255 1244 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1256 1245 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1257 1246 _remark_times.add((now - start) * 1000.0);
1258 1247
1259 1248 g1p->record_concurrent_mark_remark_end();
1260 1249 }
1261 1250
1262 1251 #define CARD_BM_TEST_MODE 0
1263 1252
1264 1253 class CalcLiveObjectsClosure: public HeapRegionClosure {
1265 1254
1266 1255 CMBitMapRO* _bm;
1267 1256 ConcurrentMark* _cm;
1268 1257 bool _changed;
1269 1258 bool _yield;
1270 1259 size_t _words_done;
1271 1260 size_t _tot_live;
1272 1261 size_t _tot_used;
1273 1262 size_t _regions_done;
1274 1263 double _start_vtime_sec;
1275 1264
1276 1265 BitMap* _region_bm;
1277 1266 BitMap* _card_bm;
1278 1267 intptr_t _bottom_card_num;
1279 1268 bool _final;
1280 1269
1281 1270 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1282 1271 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1283 1272 #if CARD_BM_TEST_MODE
1284 1273 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1285 1274 #else
1286 1275 _card_bm->par_at_put(i - _bottom_card_num, 1);
1287 1276 #endif
1288 1277 }
1289 1278 }
1290 1279
1291 1280 public:
1292 1281 CalcLiveObjectsClosure(bool final,
1293 1282 CMBitMapRO *bm, ConcurrentMark *cm,
1294 1283 BitMap* region_bm, BitMap* card_bm) :
1295 1284 _bm(bm), _cm(cm), _changed(false), _yield(true),
1296 1285 _words_done(0), _tot_live(0), _tot_used(0),
1297 1286 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1298 1287 _regions_done(0), _start_vtime_sec(0.0)
1299 1288 {
1300 1289 _bottom_card_num =
1301 1290 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1302 1291 CardTableModRefBS::card_shift);
1303 1292 }
1304 1293
1305 1294 // It takes a region that's not empty (i.e., it has at least one
1306 1295 // live object in it and sets its corresponding bit on the region
1307 1296 // bitmap to 1. If the region is "starts humongous" it will also set
1308 1297 // to 1 the bits on the region bitmap that correspond to its
1309 1298 // associated "continues humongous" regions.
1310 1299 void set_bit_for_region(HeapRegion* hr) {
1311 1300 assert(!hr->continuesHumongous(), "should have filtered those out");
1312 1301
1313 1302 size_t index = hr->hrs_index();
1314 1303 if (!hr->startsHumongous()) {
1315 1304 // Normal (non-humongous) case: just set the bit.
1316 1305 _region_bm->par_at_put((BitMap::idx_t) index, true);
1317 1306 } else {
1318 1307 // Starts humongous case: calculate how many regions are part of
1319 1308 // this humongous region and then set the bit range. It might
1320 1309 // have been a bit more efficient to look at the object that
1321 1310 // spans these humongous regions to calculate their number from
1322 1311 // the object's size. However, it's a good idea to calculate
1323 1312 // this based on the metadata itself, and not the region
1324 1313 // contents, so that this code is not aware of what goes into
1325 1314 // the humongous regions (in case this changes in the future).
1326 1315 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1327 1316 size_t end_index = index + 1;
1328 1317 while (end_index < g1h->n_regions()) {
1329 1318 HeapRegion* chr = g1h->region_at(end_index);
1330 1319 if (!chr->continuesHumongous()) break;
1331 1320 end_index += 1;
1332 1321 }
1333 1322 _region_bm->par_at_put_range((BitMap::idx_t) index,
1334 1323 (BitMap::idx_t) end_index, true);
1335 1324 }
1336 1325 }
1337 1326
1338 1327 bool doHeapRegion(HeapRegion* hr) {
1339 1328 if (!_final && _regions_done == 0) {
1340 1329 _start_vtime_sec = os::elapsedVTime();
1341 1330 }
1342 1331
1343 1332 if (hr->continuesHumongous()) {
1344 1333 // We will ignore these here and process them when their
1345 1334 // associated "starts humongous" region is processed (see
1346 1335 // set_bit_for_heap_region()). Note that we cannot rely on their
1347 1336 // associated "starts humongous" region to have their bit set to
1348 1337 // 1 since, due to the region chunking in the parallel region
1349 1338 // iteration, a "continues humongous" region might be visited
1350 1339 // before its associated "starts humongous".
1351 1340 return false;
1352 1341 }
1353 1342
1354 1343 HeapWord* nextTop = hr->next_top_at_mark_start();
1355 1344 HeapWord* start = hr->top_at_conc_mark_count();
1356 1345 assert(hr->bottom() <= start && start <= hr->end() &&
1357 1346 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1358 1347 start <= nextTop,
1359 1348 "Preconditions.");
1360 1349 // Otherwise, record the number of word's we'll examine.
1361 1350 size_t words_done = (nextTop - start);
1362 1351 // Find the first marked object at or after "start".
1363 1352 start = _bm->getNextMarkedWordAddress(start, nextTop);
1364 1353 size_t marked_bytes = 0;
1365 1354
1366 1355 // Below, the term "card num" means the result of shifting an address
1367 1356 // by the card shift -- address 0 corresponds to card number 0. One
1368 1357 // must subtract the card num of the bottom of the heap to obtain a
1369 1358 // card table index.
1370 1359 // The first card num of the sequence of live cards currently being
1371 1360 // constructed. -1 ==> no sequence.
1372 1361 intptr_t start_card_num = -1;
1373 1362 // The last card num of the sequence of live cards currently being
1374 1363 // constructed. -1 ==> no sequence.
1375 1364 intptr_t last_card_num = -1;
1376 1365
1377 1366 while (start < nextTop) {
1378 1367 if (_yield && _cm->do_yield_check()) {
1379 1368 // We yielded. It might be for a full collection, in which case
1380 1369 // all bets are off; terminate the traversal.
1381 1370 if (_cm->has_aborted()) {
1382 1371 _changed = false;
1383 1372 return true;
1384 1373 } else {
1385 1374 // Otherwise, it might be a collection pause, and the region
1386 1375 // we're looking at might be in the collection set. We'll
1387 1376 // abandon this region.
1388 1377 return false;
1389 1378 }
1390 1379 }
1391 1380 oop obj = oop(start);
1392 1381 int obj_sz = obj->size();
1393 1382 // The card num of the start of the current object.
1394 1383 intptr_t obj_card_num =
1395 1384 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1396 1385
1397 1386 HeapWord* obj_last = start + obj_sz - 1;
1398 1387 intptr_t obj_last_card_num =
1399 1388 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1400 1389
1401 1390 if (obj_card_num != last_card_num) {
1402 1391 if (start_card_num == -1) {
1403 1392 assert(last_card_num == -1, "Both or neither.");
1404 1393 start_card_num = obj_card_num;
1405 1394 } else {
1406 1395 assert(last_card_num != -1, "Both or neither.");
1407 1396 assert(obj_card_num >= last_card_num, "Inv");
1408 1397 if ((obj_card_num - last_card_num) > 1) {
1409 1398 // Mark the last run, and start a new one.
1410 1399 mark_card_num_range(start_card_num, last_card_num);
1411 1400 start_card_num = obj_card_num;
1412 1401 }
1413 1402 }
1414 1403 #if CARD_BM_TEST_MODE
1415 1404 /*
1416 1405 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1417 1406 obj_card_num - _bottom_card_num,
1418 1407 obj_last_card_num - _bottom_card_num);
1419 1408 */
1420 1409 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1421 1410 _card_bm->par_at_put(j - _bottom_card_num, 1);
1422 1411 }
1423 1412 #endif
1424 1413 }
1425 1414 // In any case, we set the last card num.
1426 1415 last_card_num = obj_last_card_num;
1427 1416
1428 1417 marked_bytes += (size_t)obj_sz * HeapWordSize;
1429 1418 // Find the next marked object after this one.
1430 1419 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1431 1420 _changed = true;
1432 1421 }
1433 1422 // Handle the last range, if any.
1434 1423 if (start_card_num != -1) {
1435 1424 mark_card_num_range(start_card_num, last_card_num);
1436 1425 }
1437 1426 if (_final) {
1438 1427 // Mark the allocated-since-marking portion...
1439 1428 HeapWord* tp = hr->top();
1440 1429 if (nextTop < tp) {
1441 1430 start_card_num =
1442 1431 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1443 1432 last_card_num =
1444 1433 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1445 1434 mark_card_num_range(start_card_num, last_card_num);
1446 1435 // This definitely means the region has live objects.
1447 1436 set_bit_for_region(hr);
1448 1437 }
1449 1438 }
1450 1439
1451 1440 hr->add_to_marked_bytes(marked_bytes);
1452 1441 // Update the live region bitmap.
1453 1442 if (marked_bytes > 0) {
1454 1443 set_bit_for_region(hr);
1455 1444 }
1456 1445 hr->set_top_at_conc_mark_count(nextTop);
1457 1446 _tot_live += hr->next_live_bytes();
1458 1447 _tot_used += hr->used();
1459 1448 _words_done = words_done;
1460 1449
1461 1450 if (!_final) {
1462 1451 ++_regions_done;
1463 1452 if (_regions_done % 10 == 0) {
1464 1453 double end_vtime_sec = os::elapsedVTime();
1465 1454 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1466 1455 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1467 1456 jlong sleep_time_ms =
1468 1457 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1469 1458 os::sleep(Thread::current(), sleep_time_ms, false);
1470 1459 _start_vtime_sec = end_vtime_sec;
1471 1460 }
1472 1461 }
1473 1462 }
1474 1463
1475 1464 return false;
1476 1465 }
1477 1466
1478 1467 bool changed() { return _changed; }
1479 1468 void reset() { _changed = false; _words_done = 0; }
1480 1469 void no_yield() { _yield = false; }
1481 1470 size_t words_done() { return _words_done; }
1482 1471 size_t tot_live() { return _tot_live; }
1483 1472 size_t tot_used() { return _tot_used; }
1484 1473 };
1485 1474
1486 1475
1487 1476 void ConcurrentMark::calcDesiredRegions() {
1488 1477 _region_bm.clear();
1489 1478 _card_bm.clear();
1490 1479 CalcLiveObjectsClosure calccl(false /*final*/,
1491 1480 nextMarkBitMap(), this,
1492 1481 &_region_bm, &_card_bm);
1493 1482 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1494 1483 g1h->heap_region_iterate(&calccl);
1495 1484
1496 1485 do {
1497 1486 calccl.reset();
1498 1487 g1h->heap_region_iterate(&calccl);
1499 1488 } while (calccl.changed());
1500 1489 }
1501 1490
1502 1491 class G1ParFinalCountTask: public AbstractGangTask {
1503 1492 protected:
1504 1493 G1CollectedHeap* _g1h;
1505 1494 CMBitMap* _bm;
1506 1495 size_t _n_workers;
1507 1496 size_t *_live_bytes;
1508 1497 size_t *_used_bytes;
1509 1498 BitMap* _region_bm;
1510 1499 BitMap* _card_bm;
1511 1500 public:
1512 1501 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1513 1502 BitMap* region_bm, BitMap* card_bm)
1514 1503 : AbstractGangTask("G1 final counting"), _g1h(g1h),
1515 1504 _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
1516 1505 _n_workers(0)
1517 1506 {
1518 1507 // Use the value already set as the number of active threads
1519 1508 // in the call to run_task(). Needed for the allocation of
1520 1509 // _live_bytes and _used_bytes.
1521 1510 if (G1CollectedHeap::use_parallel_gc_threads()) {
1522 1511 assert( _g1h->workers()->active_workers() > 0,
1523 1512 "Should have been previously set");
1524 1513 _n_workers = _g1h->workers()->active_workers();
1525 1514 } else {
1526 1515 _n_workers = 1;
1527 1516 }
1528 1517
1529 1518 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1530 1519 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1531 1520 }
1532 1521
1533 1522 ~G1ParFinalCountTask() {
1534 1523 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1535 1524 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1536 1525 }
1537 1526
1538 1527 void work(int i) {
1539 1528 CalcLiveObjectsClosure calccl(true /*final*/,
1540 1529 _bm, _g1h->concurrent_mark(),
1541 1530 _region_bm, _card_bm);
1542 1531 calccl.no_yield();
1543 1532 if (G1CollectedHeap::use_parallel_gc_threads()) {
1544 1533 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1545 1534 (int) _n_workers,
1546 1535 HeapRegion::FinalCountClaimValue);
1547 1536 } else {
1548 1537 _g1h->heap_region_iterate(&calccl);
1549 1538 }
1550 1539 assert(calccl.complete(), "Shouldn't have yielded!");
1551 1540
1552 1541 assert((size_t) i < _n_workers, "invariant");
1553 1542 _live_bytes[i] = calccl.tot_live();
1554 1543 _used_bytes[i] = calccl.tot_used();
1555 1544 }
1556 1545 size_t live_bytes() {
1557 1546 size_t live_bytes = 0;
1558 1547 for (size_t i = 0; i < _n_workers; ++i)
1559 1548 live_bytes += _live_bytes[i];
1560 1549 return live_bytes;
1561 1550 }
1562 1551 size_t used_bytes() {
1563 1552 size_t used_bytes = 0;
1564 1553 for (size_t i = 0; i < _n_workers; ++i)
1565 1554 used_bytes += _used_bytes[i];
1566 1555 return used_bytes;
1567 1556 }
1568 1557 };
1569 1558
1570 1559 class G1ParNoteEndTask;
1571 1560
1572 1561 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1573 1562 G1CollectedHeap* _g1;
1574 1563 int _worker_num;
1575 1564 size_t _max_live_bytes;
1576 1565 size_t _regions_claimed;
1577 1566 size_t _freed_bytes;
1578 1567 FreeRegionList* _local_cleanup_list;
1579 1568 OldRegionSet* _old_proxy_set;
1580 1569 HumongousRegionSet* _humongous_proxy_set;
1581 1570 HRRSCleanupTask* _hrrs_cleanup_task;
1582 1571 double _claimed_region_time;
1583 1572 double _max_region_time;
1584 1573
1585 1574 public:
1586 1575 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1587 1576 int worker_num,
1588 1577 FreeRegionList* local_cleanup_list,
1589 1578 OldRegionSet* old_proxy_set,
1590 1579 HumongousRegionSet* humongous_proxy_set,
1591 1580 HRRSCleanupTask* hrrs_cleanup_task) :
1592 1581 _g1(g1), _worker_num(worker_num),
1593 1582 _max_live_bytes(0), _regions_claimed(0),
1594 1583 _freed_bytes(0),
1595 1584 _claimed_region_time(0.0), _max_region_time(0.0),
1596 1585 _local_cleanup_list(local_cleanup_list),
1597 1586 _old_proxy_set(old_proxy_set),
1598 1587 _humongous_proxy_set(humongous_proxy_set),
1599 1588 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1600 1589
1601 1590 size_t freed_bytes() { return _freed_bytes; }
1602 1591
1603 1592 bool doHeapRegion(HeapRegion *hr) {
1604 1593 // We use a claim value of zero here because all regions
1605 1594 // were claimed with value 1 in the FinalCount task.
1606 1595 hr->reset_gc_time_stamp();
1607 1596 if (!hr->continuesHumongous()) {
1608 1597 double start = os::elapsedTime();
1609 1598 _regions_claimed++;
1610 1599 hr->note_end_of_marking();
1611 1600 _max_live_bytes += hr->max_live_bytes();
1612 1601 _g1->free_region_if_empty(hr,
1613 1602 &_freed_bytes,
1614 1603 _local_cleanup_list,
1615 1604 _old_proxy_set,
1616 1605 _humongous_proxy_set,
1617 1606 _hrrs_cleanup_task,
1618 1607 true /* par */);
1619 1608 double region_time = (os::elapsedTime() - start);
1620 1609 _claimed_region_time += region_time;
1621 1610 if (region_time > _max_region_time) {
1622 1611 _max_region_time = region_time;
1623 1612 }
1624 1613 }
1625 1614 return false;
1626 1615 }
1627 1616
1628 1617 size_t max_live_bytes() { return _max_live_bytes; }
1629 1618 size_t regions_claimed() { return _regions_claimed; }
1630 1619 double claimed_region_time_sec() { return _claimed_region_time; }
1631 1620 double max_region_time_sec() { return _max_region_time; }
1632 1621 };
1633 1622
1634 1623 class G1ParNoteEndTask: public AbstractGangTask {
1635 1624 friend class G1NoteEndOfConcMarkClosure;
1636 1625
1637 1626 protected:
1638 1627 G1CollectedHeap* _g1h;
1639 1628 size_t _max_live_bytes;
1640 1629 size_t _freed_bytes;
1641 1630 FreeRegionList* _cleanup_list;
1642 1631
1643 1632 public:
1644 1633 G1ParNoteEndTask(G1CollectedHeap* g1h,
1645 1634 FreeRegionList* cleanup_list) :
1646 1635 AbstractGangTask("G1 note end"), _g1h(g1h),
1647 1636 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1648 1637
1649 1638 void work(int i) {
1650 1639 double start = os::elapsedTime();
1651 1640 FreeRegionList local_cleanup_list("Local Cleanup List");
1652 1641 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1653 1642 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1654 1643 HRRSCleanupTask hrrs_cleanup_task;
1655 1644 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1656 1645 &old_proxy_set,
1657 1646 &humongous_proxy_set,
1658 1647 &hrrs_cleanup_task);
1659 1648 if (G1CollectedHeap::use_parallel_gc_threads()) {
1660 1649 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1661 1650 _g1h->workers()->active_workers(),
1662 1651 HeapRegion::NoteEndClaimValue);
1663 1652 } else {
1664 1653 _g1h->heap_region_iterate(&g1_note_end);
1665 1654 }
1666 1655 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1667 1656
1668 1657 // Now update the lists
1669 1658 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1670 1659 NULL /* free_list */,
1671 1660 &old_proxy_set,
1672 1661 &humongous_proxy_set,
1673 1662 true /* par */);
1674 1663 {
1675 1664 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1676 1665 _max_live_bytes += g1_note_end.max_live_bytes();
1677 1666 _freed_bytes += g1_note_end.freed_bytes();
1678 1667
1679 1668 // If we iterate over the global cleanup list at the end of
1680 1669 // cleanup to do this printing we will not guarantee to only
1681 1670 // generate output for the newly-reclaimed regions (the list
1682 1671 // might not be empty at the beginning of cleanup; we might
1683 1672 // still be working on its previous contents). So we do the
1684 1673 // printing here, before we append the new regions to the global
1685 1674 // cleanup list.
1686 1675
1687 1676 G1HRPrinter* hr_printer = _g1h->hr_printer();
1688 1677 if (hr_printer->is_active()) {
1689 1678 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1690 1679 while (iter.more_available()) {
1691 1680 HeapRegion* hr = iter.get_next();
1692 1681 hr_printer->cleanup(hr);
1693 1682 }
1694 1683 }
1695 1684
1696 1685 _cleanup_list->add_as_tail(&local_cleanup_list);
1697 1686 assert(local_cleanup_list.is_empty(), "post-condition");
1698 1687
1699 1688 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1700 1689 }
1701 1690 double end = os::elapsedTime();
1702 1691 if (G1PrintParCleanupStats) {
1703 1692 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1704 1693 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1705 1694 i, start, end, (end-start)*1000.0,
1706 1695 g1_note_end.regions_claimed(),
1707 1696 g1_note_end.claimed_region_time_sec()*1000.0,
1708 1697 g1_note_end.max_region_time_sec()*1000.0);
1709 1698 }
1710 1699 }
1711 1700 size_t max_live_bytes() { return _max_live_bytes; }
1712 1701 size_t freed_bytes() { return _freed_bytes; }
1713 1702 };
1714 1703
1715 1704 class G1ParScrubRemSetTask: public AbstractGangTask {
1716 1705 protected:
1717 1706 G1RemSet* _g1rs;
1718 1707 BitMap* _region_bm;
1719 1708 BitMap* _card_bm;
1720 1709 public:
1721 1710 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1722 1711 BitMap* region_bm, BitMap* card_bm) :
1723 1712 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1724 1713 _region_bm(region_bm), _card_bm(card_bm)
1725 1714 {}
1726 1715
1727 1716 void work(int i) {
1728 1717 if (G1CollectedHeap::use_parallel_gc_threads()) {
1729 1718 _g1rs->scrub_par(_region_bm, _card_bm, i,
1730 1719 HeapRegion::ScrubRemSetClaimValue);
1731 1720 } else {
1732 1721 _g1rs->scrub(_region_bm, _card_bm);
1733 1722 }
1734 1723 }
1735 1724
1736 1725 };
1737 1726
1738 1727 void ConcurrentMark::cleanup() {
1739 1728 // world is stopped at this checkpoint
1740 1729 assert(SafepointSynchronize::is_at_safepoint(),
1741 1730 "world should be stopped");
1742 1731 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1743 1732
1744 1733 // If a full collection has happened, we shouldn't do this.
1745 1734 if (has_aborted()) {
1746 1735 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1747 1736 return;
1748 1737 }
1749 1738
1750 1739 HRSPhaseSetter x(HRSPhaseCleanup);
1751 1740 g1h->verify_region_sets_optional();
1752 1741
1753 1742 if (VerifyDuringGC) {
1754 1743 HandleMark hm; // handle scope
1755 1744 gclog_or_tty->print(" VerifyDuringGC:(before)");
1756 1745 Universe::heap()->prepare_for_verify();
1757 1746 Universe::verify(/* allow dirty */ true,
1758 1747 /* silent */ false,
1759 1748 /* option */ VerifyOption_G1UsePrevMarking);
1760 1749 }
1761 1750
1762 1751 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1763 1752 g1p->record_concurrent_mark_cleanup_start();
1764 1753
1765 1754 double start = os::elapsedTime();
1766 1755
1767 1756 HeapRegionRemSet::reset_for_cleanup_tasks();
1768 1757
1769 1758 size_t n_workers;
1770 1759
1771 1760 // Do counting once more with the world stopped for good measure.
1772 1761 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1773 1762 &_region_bm, &_card_bm);
1774 1763 if (G1CollectedHeap::use_parallel_gc_threads()) {
1775 1764 assert(g1h->check_heap_region_claim_values(
1776 1765 HeapRegion::InitialClaimValue),
1777 1766 "sanity check");
1778 1767
1779 1768 g1h->set_par_threads();
1780 1769 n_workers = g1h->n_par_threads();
1781 1770 assert(g1h->n_par_threads() == (int) n_workers,
1782 1771 "Should not have been reset");
1783 1772 g1h->workers()->run_task(&g1_par_count_task);
1784 1773 // Done with the parallel phase so reset to 0.
1785 1774 g1h->set_par_threads(0);
1786 1775
1787 1776 assert(g1h->check_heap_region_claim_values(
1788 1777 HeapRegion::FinalCountClaimValue),
1789 1778 "sanity check");
1790 1779 } else {
1791 1780 n_workers = 1;
1792 1781 g1_par_count_task.work(0);
1793 1782 }
1794 1783
1795 1784 size_t known_garbage_bytes =
1796 1785 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1797 1786 g1p->set_known_garbage_bytes(known_garbage_bytes);
1798 1787
1799 1788 size_t start_used_bytes = g1h->used();
1800 1789 _at_least_one_mark_complete = true;
1801 1790 g1h->set_marking_complete();
1802 1791
1803 1792 ergo_verbose4(ErgoConcCycles,
1804 1793 "finish cleanup",
1805 1794 ergo_format_byte("occupancy")
1806 1795 ergo_format_byte("capacity")
1807 1796 ergo_format_byte_perc("known garbage"),
1808 1797 start_used_bytes, g1h->capacity(),
1809 1798 known_garbage_bytes,
1810 1799 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1811 1800
1812 1801 double count_end = os::elapsedTime();
1813 1802 double this_final_counting_time = (count_end - start);
1814 1803 if (G1PrintParCleanupStats) {
1815 1804 gclog_or_tty->print_cr("Cleanup:");
1816 1805 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1817 1806 this_final_counting_time*1000.0);
1818 1807 }
1819 1808 _total_counting_time += this_final_counting_time;
1820 1809
1821 1810 if (G1PrintRegionLivenessInfo) {
1822 1811 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1823 1812 _g1h->heap_region_iterate(&cl);
1824 1813 }
1825 1814
1826 1815 // Install newly created mark bitMap as "prev".
1827 1816 swapMarkBitMaps();
1828 1817
1829 1818 g1h->reset_gc_time_stamp();
1830 1819
1831 1820 // Note end of marking in all heap regions.
1832 1821 double note_end_start = os::elapsedTime();
1833 1822 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1834 1823 if (G1CollectedHeap::use_parallel_gc_threads()) {
1835 1824 g1h->set_par_threads((int)n_workers);
1836 1825 g1h->workers()->run_task(&g1_par_note_end_task);
1837 1826 g1h->set_par_threads(0);
1838 1827
1839 1828 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1840 1829 "sanity check");
1841 1830 } else {
1842 1831 g1_par_note_end_task.work(0);
1843 1832 }
1844 1833
1845 1834 if (!cleanup_list_is_empty()) {
1846 1835 // The cleanup list is not empty, so we'll have to process it
1847 1836 // concurrently. Notify anyone else that might be wanting free
1848 1837 // regions that there will be more free regions coming soon.
1849 1838 g1h->set_free_regions_coming();
1850 1839 }
1851 1840 double note_end_end = os::elapsedTime();
1852 1841 if (G1PrintParCleanupStats) {
1853 1842 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1854 1843 (note_end_end - note_end_start)*1000.0);
1855 1844 }
1856 1845
1857 1846 // call below, since it affects the metric by which we sort the heap
1858 1847 // regions.
1859 1848 if (G1ScrubRemSets) {
1860 1849 double rs_scrub_start = os::elapsedTime();
1861 1850 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1862 1851 if (G1CollectedHeap::use_parallel_gc_threads()) {
1863 1852 g1h->set_par_threads((int)n_workers);
1864 1853 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1865 1854 g1h->set_par_threads(0);
1866 1855
1867 1856 assert(g1h->check_heap_region_claim_values(
1868 1857 HeapRegion::ScrubRemSetClaimValue),
1869 1858 "sanity check");
1870 1859 } else {
1871 1860 g1_par_scrub_rs_task.work(0);
1872 1861 }
1873 1862
1874 1863 double rs_scrub_end = os::elapsedTime();
1875 1864 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1876 1865 _total_rs_scrub_time += this_rs_scrub_time;
1877 1866 }
1878 1867
1879 1868 // this will also free any regions totally full of garbage objects,
1880 1869 // and sort the regions.
1881 1870 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1882 1871
1883 1872 // Statistics.
1884 1873 double end = os::elapsedTime();
1885 1874 _cleanup_times.add((end - start) * 1000.0);
1886 1875
1887 1876 // G1CollectedHeap::heap()->print();
1888 1877 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1889 1878 // G1CollectedHeap::heap()->get_gc_time_stamp());
1890 1879
1891 1880 if (PrintGC || PrintGCDetails) {
1892 1881 g1h->print_size_transition(gclog_or_tty,
1893 1882 start_used_bytes,
1894 1883 g1h->used(),
1895 1884 g1h->capacity());
1896 1885 }
1897 1886
1898 1887 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1899 1888 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1900 1889
1901 1890 // Clean up will have freed any regions completely full of garbage.
1902 1891 // Update the soft reference policy with the new heap occupancy.
1903 1892 Universe::update_heap_info_at_gc();
1904 1893
1905 1894 // We need to make this be a "collection" so any collection pause that
1906 1895 // races with it goes around and waits for completeCleanup to finish.
1907 1896 g1h->increment_total_collections();
1908 1897
1909 1898 if (VerifyDuringGC) {
1910 1899 HandleMark hm; // handle scope
1911 1900 gclog_or_tty->print(" VerifyDuringGC:(after)");
1912 1901 Universe::heap()->prepare_for_verify();
1913 1902 Universe::verify(/* allow dirty */ true,
1914 1903 /* silent */ false,
1915 1904 /* option */ VerifyOption_G1UsePrevMarking);
1916 1905 }
1917 1906
1918 1907 g1h->verify_region_sets_optional();
1919 1908 }
1920 1909
1921 1910 void ConcurrentMark::completeCleanup() {
1922 1911 if (has_aborted()) return;
1923 1912
1924 1913 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1925 1914
1926 1915 _cleanup_list.verify_optional();
1927 1916 FreeRegionList tmp_free_list("Tmp Free List");
1928 1917
1929 1918 if (G1ConcRegionFreeingVerbose) {
1930 1919 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1931 1920 "cleanup list has "SIZE_FORMAT" entries",
1932 1921 _cleanup_list.length());
1933 1922 }
1934 1923
1935 1924 // Noone else should be accessing the _cleanup_list at this point,
1936 1925 // so it's not necessary to take any locks
1937 1926 while (!_cleanup_list.is_empty()) {
1938 1927 HeapRegion* hr = _cleanup_list.remove_head();
1939 1928 assert(hr != NULL, "the list was not empty");
1940 1929 hr->par_clear();
1941 1930 tmp_free_list.add_as_tail(hr);
1942 1931
1943 1932 // Instead of adding one region at a time to the secondary_free_list,
1944 1933 // we accumulate them in the local list and move them a few at a
1945 1934 // time. This also cuts down on the number of notify_all() calls
1946 1935 // we do during this process. We'll also append the local list when
1947 1936 // _cleanup_list is empty (which means we just removed the last
1948 1937 // region from the _cleanup_list).
1949 1938 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1950 1939 _cleanup_list.is_empty()) {
1951 1940 if (G1ConcRegionFreeingVerbose) {
1952 1941 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1953 1942 "appending "SIZE_FORMAT" entries to the "
1954 1943 "secondary_free_list, clean list still has "
1955 1944 SIZE_FORMAT" entries",
1956 1945 tmp_free_list.length(),
1957 1946 _cleanup_list.length());
1958 1947 }
1959 1948
1960 1949 {
1961 1950 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1962 1951 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1963 1952 SecondaryFreeList_lock->notify_all();
1964 1953 }
1965 1954
1966 1955 if (G1StressConcRegionFreeing) {
1967 1956 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1968 1957 os::sleep(Thread::current(), (jlong) 1, false);
1969 1958 }
1970 1959 }
1971 1960 }
1972 1961 }
1973 1962 assert(tmp_free_list.is_empty(), "post-condition");
1974 1963 }
1975 1964
1976 1965 // Support closures for reference procssing in G1
1977 1966
1978 1967 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1979 1968 HeapWord* addr = (HeapWord*)obj;
1980 1969 return addr != NULL &&
1981 1970 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1982 1971 }
1983 1972
1984 1973 class G1CMKeepAliveClosure: public OopClosure {
1985 1974 G1CollectedHeap* _g1;
1986 1975 ConcurrentMark* _cm;
1987 1976 CMBitMap* _bitMap;
1988 1977 public:
1989 1978 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1990 1979 CMBitMap* bitMap) :
1991 1980 _g1(g1), _cm(cm),
1992 1981 _bitMap(bitMap) {}
1993 1982
1994 1983 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1995 1984 virtual void do_oop( oop* p) { do_oop_work(p); }
1996 1985
1997 1986 template <class T> void do_oop_work(T* p) {
1998 1987 oop obj = oopDesc::load_decode_heap_oop(p);
1999 1988 HeapWord* addr = (HeapWord*)obj;
2000 1989
2001 1990 if (_cm->verbose_high()) {
2002 1991 gclog_or_tty->print_cr("\t[0] we're looking at location "
2003 1992 "*"PTR_FORMAT" = "PTR_FORMAT,
2004 1993 p, (void*) obj);
2005 1994 }
2006 1995
2007 1996 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2008 1997 _bitMap->mark(addr);
2009 1998 _cm->mark_stack_push(obj);
2010 1999 }
2011 2000 }
2012 2001 };
2013 2002
2014 2003 class G1CMDrainMarkingStackClosure: public VoidClosure {
2015 2004 CMMarkStack* _markStack;
2016 2005 CMBitMap* _bitMap;
2017 2006 G1CMKeepAliveClosure* _oopClosure;
2018 2007 public:
2019 2008 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2020 2009 G1CMKeepAliveClosure* oopClosure) :
2021 2010 _bitMap(bitMap),
2022 2011 _markStack(markStack),
2023 2012 _oopClosure(oopClosure)
2024 2013 {}
2025 2014
2026 2015 void do_void() {
2027 2016 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2028 2017 }
2029 2018 };
2030 2019
2031 2020 // 'Keep Alive' closure used by parallel reference processing.
2032 2021 // An instance of this closure is used in the parallel reference processing
2033 2022 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2034 2023 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2035 2024 // placed on to discovered ref lists once so we can mark and push with no
2036 2025 // need to check whether the object has already been marked. Using the
2037 2026 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2038 2027 // operating on the global mark stack. This means that an individual
2039 2028 // worker would be doing lock-free pushes while it processes its own
2040 2029 // discovered ref list followed by drain call. If the discovered ref lists
2041 2030 // are unbalanced then this could cause interference with the other
2042 2031 // workers. Using a CMTask (and its embedded local data structures)
2043 2032 // avoids that potential interference.
2044 2033 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2045 2034 ConcurrentMark* _cm;
2046 2035 CMTask* _task;
2047 2036 int _ref_counter_limit;
2048 2037 int _ref_counter;
2049 2038 public:
2050 2039 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2051 2040 _cm(cm), _task(task),
2052 2041 _ref_counter_limit(G1RefProcDrainInterval) {
2053 2042 assert(_ref_counter_limit > 0, "sanity");
2054 2043 _ref_counter = _ref_counter_limit;
2055 2044 }
2056 2045
2057 2046 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2058 2047 virtual void do_oop( oop* p) { do_oop_work(p); }
2059 2048
2060 2049 template <class T> void do_oop_work(T* p) {
2061 2050 if (!_cm->has_overflown()) {
2062 2051 oop obj = oopDesc::load_decode_heap_oop(p);
2063 2052 if (_cm->verbose_high()) {
2064 2053 gclog_or_tty->print_cr("\t[%d] we're looking at location "
2065 2054 "*"PTR_FORMAT" = "PTR_FORMAT,
2066 2055 _task->task_id(), p, (void*) obj);
2067 2056 }
2068 2057
2069 2058 _task->deal_with_reference(obj);
2070 2059 _ref_counter--;
2071 2060
2072 2061 if (_ref_counter == 0) {
2073 2062 // We have dealt with _ref_counter_limit references, pushing them and objects
2074 2063 // reachable from them on to the local stack (and possibly the global stack).
2075 2064 // Call do_marking_step() to process these entries. We call the routine in a
2076 2065 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2077 2066 // with the entries that we've pushed as a result of the deal_with_reference
2078 2067 // calls above) or we overflow.
2079 2068 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2080 2069 // while there may still be some work to do. (See the comment at the
2081 2070 // beginning of CMTask::do_marking_step() for those conditions - one of which
2082 2071 // is reaching the specified time target.) It is only when
2083 2072 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2084 2073 // that the marking has completed.
2085 2074 do {
2086 2075 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2087 2076 _task->do_marking_step(mark_step_duration_ms,
2088 2077 false /* do_stealing */,
2089 2078 false /* do_termination */);
2090 2079 } while (_task->has_aborted() && !_cm->has_overflown());
2091 2080 _ref_counter = _ref_counter_limit;
2092 2081 }
2093 2082 } else {
2094 2083 if (_cm->verbose_high()) {
2095 2084 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2096 2085 }
2097 2086 }
2098 2087 }
2099 2088 };
2100 2089
2101 2090 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2102 2091 ConcurrentMark* _cm;
2103 2092 CMTask* _task;
2104 2093 public:
2105 2094 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2106 2095 _cm(cm), _task(task)
2107 2096 {}
2108 2097
2109 2098 void do_void() {
2110 2099 do {
2111 2100 if (_cm->verbose_high()) {
2112 2101 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2113 2102 _task->task_id());
2114 2103 }
2115 2104
2116 2105 // We call CMTask::do_marking_step() to completely drain the local and
2117 2106 // global marking stacks. The routine is called in a loop, which we'll
2118 2107 // exit if there's nothing more to do (i.e. we'completely drained the
2119 2108 // entries that were pushed as a result of applying the
2120 2109 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2121 2110 // lists above) or we overflow the global marking stack.
2122 2111 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2123 2112 // while there may still be some work to do. (See the comment at the
2124 2113 // beginning of CMTask::do_marking_step() for those conditions - one of which
2125 2114 // is reaching the specified time target.) It is only when
2126 2115 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2127 2116 // that the marking has completed.
2128 2117
2129 2118 _task->do_marking_step(1000000000.0 /* something very large */,
2130 2119 true /* do_stealing */,
2131 2120 true /* do_termination */);
2132 2121 } while (_task->has_aborted() && !_cm->has_overflown());
2133 2122 }
2134 2123 };
2135 2124
2136 2125 // Implementation of AbstractRefProcTaskExecutor for parallel
2137 2126 // reference processing at the end of G1 concurrent marking
2138 2127
2139 2128 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2140 2129 private:
2141 2130 G1CollectedHeap* _g1h;
2142 2131 ConcurrentMark* _cm;
2143 2132 WorkGang* _workers;
2144 2133 int _active_workers;
2145 2134
2146 2135 public:
2147 2136 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2148 2137 ConcurrentMark* cm,
2149 2138 WorkGang* workers,
2150 2139 int n_workers) :
2151 2140 _g1h(g1h), _cm(cm),
2152 2141 _workers(workers), _active_workers(n_workers) { }
2153 2142
2154 2143 // Executes the given task using concurrent marking worker threads.
2155 2144 virtual void execute(ProcessTask& task);
2156 2145 virtual void execute(EnqueueTask& task);
2157 2146 };
2158 2147
2159 2148 class G1CMRefProcTaskProxy: public AbstractGangTask {
2160 2149 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2161 2150 ProcessTask& _proc_task;
2162 2151 G1CollectedHeap* _g1h;
2163 2152 ConcurrentMark* _cm;
2164 2153
2165 2154 public:
2166 2155 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2167 2156 G1CollectedHeap* g1h,
2168 2157 ConcurrentMark* cm) :
2169 2158 AbstractGangTask("Process reference objects in parallel"),
2170 2159 _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2171 2160
2172 2161 virtual void work(int i) {
2173 2162 CMTask* marking_task = _cm->task(i);
2174 2163 G1CMIsAliveClosure g1_is_alive(_g1h);
2175 2164 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2176 2165 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2177 2166
2178 2167 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2179 2168 }
2180 2169 };
2181 2170
2182 2171 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2183 2172 assert(_workers != NULL, "Need parallel worker threads.");
2184 2173
2185 2174 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2186 2175
2187 2176 // We need to reset the phase for each task execution so that
2188 2177 // the termination protocol of CMTask::do_marking_step works.
2189 2178 _cm->set_phase(_active_workers, false /* concurrent */);
2190 2179 _g1h->set_par_threads(_active_workers);
2191 2180 _workers->run_task(&proc_task_proxy);
2192 2181 _g1h->set_par_threads(0);
2193 2182 }
2194 2183
2195 2184 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2196 2185 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2197 2186 EnqueueTask& _enq_task;
2198 2187
2199 2188 public:
2200 2189 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2201 2190 AbstractGangTask("Enqueue reference objects in parallel"),
2202 2191 _enq_task(enq_task) { }
2203 2192
2204 2193 virtual void work(int i) {
2205 2194 _enq_task.work(i);
2206 2195 }
2207 2196 };
2208 2197
2209 2198 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2210 2199 assert(_workers != NULL, "Need parallel worker threads.");
2211 2200
2212 2201 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2213 2202
2214 2203 _g1h->set_par_threads(_active_workers);
2215 2204 _workers->run_task(&enq_task_proxy);
2216 2205 _g1h->set_par_threads(0);
2217 2206 }
2218 2207
2219 2208 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2220 2209 ResourceMark rm;
2221 2210 HandleMark hm;
2222 2211
2223 2212 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2224 2213
2225 2214 // Is alive closure.
2226 2215 G1CMIsAliveClosure g1_is_alive(g1h);
2227 2216
2228 2217 // Inner scope to exclude the cleaning of the string and symbol
2229 2218 // tables from the displayed time.
2230 2219 {
2231 2220 bool verbose = PrintGC && PrintGCDetails;
2232 2221 if (verbose) {
2233 2222 gclog_or_tty->put(' ');
2234 2223 }
2235 2224 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2236 2225
2237 2226 ReferenceProcessor* rp = g1h->ref_processor_cm();
2238 2227
2239 2228 // See the comment in G1CollectedHeap::ref_processing_init()
2240 2229 // about how reference processing currently works in G1.
2241 2230
2242 2231 // Process weak references.
2243 2232 rp->setup_policy(clear_all_soft_refs);
2244 2233 assert(_markStack.isEmpty(), "mark stack should be empty");
2245 2234
2246 2235 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2247 2236 G1CMDrainMarkingStackClosure
2248 2237 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2249 2238
2250 2239 // We use the work gang from the G1CollectedHeap and we utilize all
2251 2240 // the worker threads.
2252 2241 int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
2253 2242 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2254 2243
2255 2244 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2256 2245 g1h->workers(), active_workers);
2257 2246
2258 2247 if (rp->processing_is_mt()) {
2259 2248 // Set the degree of MT here. If the discovery is done MT, there
2260 2249 // may have been a different number of threads doing the discovery
2261 2250 // and a different number of discovered lists may have Ref objects.
2262 2251 // That is OK as long as the Reference lists are balanced (see
2263 2252 // balance_all_queues() and balance_queues()).
2264 2253 rp->set_active_mt_degree(active_workers);
2265 2254
2266 2255 rp->process_discovered_references(&g1_is_alive,
2267 2256 &g1_keep_alive,
2268 2257 &g1_drain_mark_stack,
2269 2258 &par_task_executor);
2270 2259
2271 2260 // The work routines of the parallel keep_alive and drain_marking_stack
2272 2261 // will set the has_overflown flag if we overflow the global marking
2273 2262 // stack.
2274 2263 } else {
2275 2264 rp->process_discovered_references(&g1_is_alive,
2276 2265 &g1_keep_alive,
2277 2266 &g1_drain_mark_stack,
2278 2267 NULL);
2279 2268 }
2280 2269
2281 2270 assert(_markStack.overflow() || _markStack.isEmpty(),
2282 2271 "mark stack should be empty (unless it overflowed)");
2283 2272 if (_markStack.overflow()) {
2284 2273 // Should have been done already when we tried to push an
2285 2274 // entry on to the global mark stack. But let's do it again.
2286 2275 set_has_overflown();
2287 2276 }
2288 2277
2289 2278 if (rp->processing_is_mt()) {
2290 2279 assert(rp->num_q() == active_workers, "why not");
2291 2280 rp->enqueue_discovered_references(&par_task_executor);
2292 2281 } else {
2293 2282 rp->enqueue_discovered_references();
2294 2283 }
2295 2284
2296 2285 rp->verify_no_references_recorded();
2297 2286 assert(!rp->discovery_enabled(), "Post condition");
2298 2287 }
2299 2288
2300 2289 // Now clean up stale oops in StringTable
2301 2290 StringTable::unlink(&g1_is_alive);
2302 2291 // Clean up unreferenced symbols in symbol table.
2303 2292 SymbolTable::unlink();
2304 2293 }
2305 2294
2306 2295 void ConcurrentMark::swapMarkBitMaps() {
2307 2296 CMBitMapRO* temp = _prevMarkBitMap;
2308 2297 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2309 2298 _nextMarkBitMap = (CMBitMap*) temp;
2310 2299 }
2311 2300
2312 2301 class CMRemarkTask: public AbstractGangTask {
2313 2302 private:
2314 2303 ConcurrentMark *_cm;
2315 2304
2316 2305 public:
2317 2306 void work(int worker_i) {
2318 2307 // Since all available tasks are actually started, we should
2319 2308 // only proceed if we're supposed to be actived.
2320 2309 if ((size_t)worker_i < _cm->active_tasks()) {
2321 2310 CMTask* task = _cm->task(worker_i);
2322 2311 task->record_start_time();
2323 2312 do {
2324 2313 task->do_marking_step(1000000000.0 /* something very large */,
2325 2314 true /* do_stealing */,
2326 2315 true /* do_termination */);
2327 2316 } while (task->has_aborted() && !_cm->has_overflown());
2328 2317 // If we overflow, then we do not want to restart. We instead
2329 2318 // want to abort remark and do concurrent marking again.
2330 2319 task->record_end_time();
2331 2320 }
2332 2321 }
2333 2322
2334 2323 CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2335 2324 AbstractGangTask("Par Remark"), _cm(cm) {
2336 2325 _cm->terminator()->reset_for_reuse(active_workers);
2337 2326 }
2338 2327 };
2339 2328
2340 2329 void ConcurrentMark::checkpointRootsFinalWork() {
2341 2330 ResourceMark rm;
2342 2331 HandleMark hm;
2343 2332 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2344 2333
2345 2334 g1h->ensure_parsability(false);
2346 2335
2347 2336 if (G1CollectedHeap::use_parallel_gc_threads()) {
2348 2337 G1CollectedHeap::StrongRootsScope srs(g1h);
2349 2338 // this is remark, so we'll use up all active threads
2350 2339 int active_workers = g1h->workers()->active_workers();
2351 2340 if (active_workers == 0) {
2352 2341 assert(active_workers > 0, "Should have been set earlier");
2353 2342 active_workers = ParallelGCThreads;
2354 2343 g1h->workers()->set_active_workers(active_workers);
2355 2344 }
2356 2345 set_phase(active_workers, false /* concurrent */);
2357 2346 // Leave _parallel_marking_threads at it's
2358 2347 // value originally calculated in the ConcurrentMark
2359 2348 // constructor and pass values of the active workers
2360 2349 // through the gang in the task.
2361 2350
2362 2351 CMRemarkTask remarkTask(this, active_workers);
2363 2352 g1h->set_par_threads(active_workers);
2364 2353 g1h->workers()->run_task(&remarkTask);
2365 2354 g1h->set_par_threads(0);
2366 2355 } else {
2367 2356 G1CollectedHeap::StrongRootsScope srs(g1h);
2368 2357 // this is remark, so we'll use up all available threads
2369 2358 int active_workers = 1;
2370 2359 set_phase(active_workers, false /* concurrent */);
2371 2360
2372 2361 CMRemarkTask remarkTask(this, active_workers);
2373 2362 // We will start all available threads, even if we decide that the
2374 2363 // active_workers will be fewer. The extra ones will just bail out
2375 2364 // immediately.
2376 2365 remarkTask.work(0);
2377 2366 }
2378 2367 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2379 2368 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2380 2369
2381 2370 print_stats();
2382 2371
2383 2372 #if VERIFY_OBJS_PROCESSED
2384 2373 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2385 2374 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2386 2375 _scan_obj_cl.objs_processed,
2387 2376 ThreadLocalObjQueue::objs_enqueued);
2388 2377 guarantee(_scan_obj_cl.objs_processed ==
2389 2378 ThreadLocalObjQueue::objs_enqueued,
2390 2379 "Different number of objs processed and enqueued.");
2391 2380 }
2392 2381 #endif
2393 2382 }
2394 2383
2395 2384 #ifndef PRODUCT
2396 2385
2397 2386 class PrintReachableOopClosure: public OopClosure {
2398 2387 private:
2399 2388 G1CollectedHeap* _g1h;
2400 2389 outputStream* _out;
2401 2390 VerifyOption _vo;
2402 2391 bool _all;
2403 2392
2404 2393 public:
2405 2394 PrintReachableOopClosure(outputStream* out,
2406 2395 VerifyOption vo,
2407 2396 bool all) :
2408 2397 _g1h(G1CollectedHeap::heap()),
2409 2398 _out(out), _vo(vo), _all(all) { }
2410 2399
2411 2400 void do_oop(narrowOop* p) { do_oop_work(p); }
2412 2401 void do_oop( oop* p) { do_oop_work(p); }
2413 2402
2414 2403 template <class T> void do_oop_work(T* p) {
2415 2404 oop obj = oopDesc::load_decode_heap_oop(p);
2416 2405 const char* str = NULL;
2417 2406 const char* str2 = "";
2418 2407
2419 2408 if (obj == NULL) {
2420 2409 str = "";
2421 2410 } else if (!_g1h->is_in_g1_reserved(obj)) {
2422 2411 str = " O";
2423 2412 } else {
2424 2413 HeapRegion* hr = _g1h->heap_region_containing(obj);
2425 2414 guarantee(hr != NULL, "invariant");
2426 2415 bool over_tams = false;
2427 2416 bool marked = false;
2428 2417
2429 2418 switch (_vo) {
2430 2419 case VerifyOption_G1UsePrevMarking:
2431 2420 over_tams = hr->obj_allocated_since_prev_marking(obj);
2432 2421 marked = _g1h->isMarkedPrev(obj);
2433 2422 break;
2434 2423 case VerifyOption_G1UseNextMarking:
2435 2424 over_tams = hr->obj_allocated_since_next_marking(obj);
2436 2425 marked = _g1h->isMarkedNext(obj);
2437 2426 break;
2438 2427 case VerifyOption_G1UseMarkWord:
2439 2428 marked = obj->is_gc_marked();
2440 2429 break;
2441 2430 default:
2442 2431 ShouldNotReachHere();
2443 2432 }
2444 2433
2445 2434 if (over_tams) {
2446 2435 str = " >";
2447 2436 if (marked) {
2448 2437 str2 = " AND MARKED";
2449 2438 }
2450 2439 } else if (marked) {
2451 2440 str = " M";
2452 2441 } else {
2453 2442 str = " NOT";
2454 2443 }
2455 2444 }
2456 2445
2457 2446 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2458 2447 p, (void*) obj, str, str2);
2459 2448 }
2460 2449 };
2461 2450
2462 2451 class PrintReachableObjectClosure : public ObjectClosure {
2463 2452 private:
2464 2453 G1CollectedHeap* _g1h;
2465 2454 outputStream* _out;
2466 2455 VerifyOption _vo;
2467 2456 bool _all;
2468 2457 HeapRegion* _hr;
2469 2458
2470 2459 public:
2471 2460 PrintReachableObjectClosure(outputStream* out,
2472 2461 VerifyOption vo,
2473 2462 bool all,
2474 2463 HeapRegion* hr) :
2475 2464 _g1h(G1CollectedHeap::heap()),
2476 2465 _out(out), _vo(vo), _all(all), _hr(hr) { }
2477 2466
2478 2467 void do_object(oop o) {
2479 2468 bool over_tams = false;
2480 2469 bool marked = false;
2481 2470
2482 2471 switch (_vo) {
2483 2472 case VerifyOption_G1UsePrevMarking:
2484 2473 over_tams = _hr->obj_allocated_since_prev_marking(o);
2485 2474 marked = _g1h->isMarkedPrev(o);
2486 2475 break;
2487 2476 case VerifyOption_G1UseNextMarking:
2488 2477 over_tams = _hr->obj_allocated_since_next_marking(o);
2489 2478 marked = _g1h->isMarkedNext(o);
2490 2479 break;
2491 2480 case VerifyOption_G1UseMarkWord:
2492 2481 marked = o->is_gc_marked();
2493 2482 break;
2494 2483 default:
2495 2484 ShouldNotReachHere();
2496 2485 }
2497 2486 bool print_it = _all || over_tams || marked;
2498 2487
2499 2488 if (print_it) {
2500 2489 _out->print_cr(" "PTR_FORMAT"%s",
2501 2490 o, (over_tams) ? " >" : (marked) ? " M" : "");
2502 2491 PrintReachableOopClosure oopCl(_out, _vo, _all);
2503 2492 o->oop_iterate(&oopCl);
2504 2493 }
2505 2494 }
2506 2495 };
2507 2496
2508 2497 class PrintReachableRegionClosure : public HeapRegionClosure {
2509 2498 private:
2510 2499 outputStream* _out;
2511 2500 VerifyOption _vo;
2512 2501 bool _all;
2513 2502
2514 2503 public:
2515 2504 bool doHeapRegion(HeapRegion* hr) {
2516 2505 HeapWord* b = hr->bottom();
2517 2506 HeapWord* e = hr->end();
2518 2507 HeapWord* t = hr->top();
2519 2508 HeapWord* p = NULL;
2520 2509
2521 2510 switch (_vo) {
2522 2511 case VerifyOption_G1UsePrevMarking:
2523 2512 p = hr->prev_top_at_mark_start();
2524 2513 break;
2525 2514 case VerifyOption_G1UseNextMarking:
2526 2515 p = hr->next_top_at_mark_start();
2527 2516 break;
2528 2517 case VerifyOption_G1UseMarkWord:
2529 2518 // When we are verifying marking using the mark word
2530 2519 // TAMS has no relevance.
2531 2520 assert(p == NULL, "post-condition");
2532 2521 break;
2533 2522 default:
2534 2523 ShouldNotReachHere();
2535 2524 }
2536 2525 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2537 2526 "TAMS: "PTR_FORMAT, b, e, t, p);
2538 2527 _out->cr();
2539 2528
2540 2529 HeapWord* from = b;
2541 2530 HeapWord* to = t;
2542 2531
2543 2532 if (to > from) {
2544 2533 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2545 2534 _out->cr();
2546 2535 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2547 2536 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2548 2537 _out->cr();
2549 2538 }
2550 2539
2551 2540 return false;
2552 2541 }
2553 2542
2554 2543 PrintReachableRegionClosure(outputStream* out,
2555 2544 VerifyOption vo,
2556 2545 bool all) :
2557 2546 _out(out), _vo(vo), _all(all) { }
2558 2547 };
2559 2548
2560 2549 static const char* verify_option_to_tams(VerifyOption vo) {
2561 2550 switch (vo) {
2562 2551 case VerifyOption_G1UsePrevMarking:
2563 2552 return "PTAMS";
2564 2553 case VerifyOption_G1UseNextMarking:
2565 2554 return "NTAMS";
2566 2555 default:
2567 2556 return "NONE";
2568 2557 }
2569 2558 }
2570 2559
2571 2560 void ConcurrentMark::print_reachable(const char* str,
2572 2561 VerifyOption vo,
2573 2562 bool all) {
2574 2563 gclog_or_tty->cr();
2575 2564 gclog_or_tty->print_cr("== Doing heap dump... ");
2576 2565
2577 2566 if (G1PrintReachableBaseFile == NULL) {
2578 2567 gclog_or_tty->print_cr(" #### error: no base file defined");
2579 2568 return;
2580 2569 }
2581 2570
2582 2571 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2583 2572 (JVM_MAXPATHLEN - 1)) {
2584 2573 gclog_or_tty->print_cr(" #### error: file name too long");
2585 2574 return;
2586 2575 }
2587 2576
2588 2577 char file_name[JVM_MAXPATHLEN];
2589 2578 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2590 2579 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2591 2580
2592 2581 fileStream fout(file_name);
2593 2582 if (!fout.is_open()) {
2594 2583 gclog_or_tty->print_cr(" #### error: could not open file");
2595 2584 return;
2596 2585 }
2597 2586
2598 2587 outputStream* out = &fout;
2599 2588 out->print_cr("-- USING %s", verify_option_to_tams(vo));
2600 2589 out->cr();
2601 2590
2602 2591 out->print_cr("--- ITERATING OVER REGIONS");
2603 2592 out->cr();
2604 2593 PrintReachableRegionClosure rcl(out, vo, all);
2605 2594 _g1h->heap_region_iterate(&rcl);
2606 2595 out->cr();
2607 2596
2608 2597 gclog_or_tty->print_cr(" done");
2609 2598 gclog_or_tty->flush();
2610 2599 }
2611 2600
2612 2601 #endif // PRODUCT
2613 2602
2614 2603 // This note is for drainAllSATBBuffers and the code in between.
2615 2604 // In the future we could reuse a task to do this work during an
2616 2605 // evacuation pause (since now tasks are not active and can be claimed
2617 2606 // during an evacuation pause). This was a late change to the code and
2618 2607 // is currently not being taken advantage of.
2619 2608
2620 2609 class CMGlobalObjectClosure : public ObjectClosure {
2621 2610 private:
2622 2611 ConcurrentMark* _cm;
2623 2612
2624 2613 public:
2625 2614 void do_object(oop obj) {
2626 2615 _cm->deal_with_reference(obj);
2627 2616 }
2628 2617
2629 2618 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2630 2619 };
2631 2620
2632 2621 void ConcurrentMark::deal_with_reference(oop obj) {
2633 2622 if (verbose_high()) {
2634 2623 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2635 2624 (void*) obj);
2636 2625 }
2637 2626
2638 2627 HeapWord* objAddr = (HeapWord*) obj;
2639 2628 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2640 2629 if (_g1h->is_in_g1_reserved(objAddr)) {
2641 2630 assert(obj != NULL, "null check is implicit");
2642 2631 if (!_nextMarkBitMap->isMarked(objAddr)) {
2643 2632 // Only get the containing region if the object is not marked on the
2644 2633 // bitmap (otherwise, it's a waste of time since we won't do
2645 2634 // anything with it).
2646 2635 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2647 2636 if (!hr->obj_allocated_since_next_marking(obj)) {
2648 2637 if (verbose_high()) {
2649 2638 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2650 2639 "marked", (void*) obj);
2651 2640 }
2652 2641
2653 2642 // we need to mark it first
2654 2643 if (_nextMarkBitMap->parMark(objAddr)) {
2655 2644 // No OrderAccess:store_load() is needed. It is implicit in the
2656 2645 // CAS done in parMark(objAddr) above
2657 2646 HeapWord* finger = _finger;
2658 2647 if (objAddr < finger) {
2659 2648 if (verbose_high()) {
2660 2649 gclog_or_tty->print_cr("[global] below the global finger "
2661 2650 "("PTR_FORMAT"), pushing it", finger);
2662 2651 }
2663 2652 if (!mark_stack_push(obj)) {
2664 2653 if (verbose_low()) {
2665 2654 gclog_or_tty->print_cr("[global] global stack overflow during "
2666 2655 "deal_with_reference");
2667 2656 }
2668 2657 }
2669 2658 }
2670 2659 }
2671 2660 }
2672 2661 }
2673 2662 }
2674 2663 }
2675 2664
2676 2665 void ConcurrentMark::drainAllSATBBuffers() {
2677 2666 CMGlobalObjectClosure oc(this);
2678 2667 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2679 2668 satb_mq_set.set_closure(&oc);
2680 2669
2681 2670 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2682 2671 if (verbose_medium()) {
2683 2672 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2684 2673 }
2685 2674 }
2686 2675
2687 2676 // no need to check whether we should do this, as this is only
2688 2677 // called during an evacuation pause
2689 2678 satb_mq_set.iterate_closure_all_threads();
2690 2679
2691 2680 satb_mq_set.set_closure(NULL);
2692 2681 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2693 2682 }
2694 2683
2695 2684 void ConcurrentMark::markPrev(oop p) {
2696 2685 // Note we are overriding the read-only view of the prev map here, via
2697 2686 // the cast.
2698 2687 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2699 2688 }
2700 2689
2701 2690 void ConcurrentMark::clear(oop p) {
2702 2691 assert(p != NULL && p->is_oop(), "expected an oop");
2703 2692 HeapWord* addr = (HeapWord*)p;
2704 2693 assert(addr >= _nextMarkBitMap->startWord() ||
2705 2694 addr < _nextMarkBitMap->endWord(), "in a region");
2706 2695
2707 2696 _nextMarkBitMap->clear(addr);
2708 2697 }
2709 2698
2710 2699 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2711 2700 // Note we are overriding the read-only view of the prev map here, via
2712 2701 // the cast.
2713 2702 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2714 2703 _nextMarkBitMap->clearRange(mr);
2715 2704 }
2716 2705
2717 2706 HeapRegion*
2718 2707 ConcurrentMark::claim_region(int task_num) {
2719 2708 // "checkpoint" the finger
2720 2709 HeapWord* finger = _finger;
2721 2710
2722 2711 // _heap_end will not change underneath our feet; it only changes at
2723 2712 // yield points.
2724 2713 while (finger < _heap_end) {
2725 2714 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2726 2715
2727 2716 // Note on how this code handles humongous regions. In the
2728 2717 // normal case the finger will reach the start of a "starts
2729 2718 // humongous" (SH) region. Its end will either be the end of the
2730 2719 // last "continues humongous" (CH) region in the sequence, or the
2731 2720 // standard end of the SH region (if the SH is the only region in
2732 2721 // the sequence). That way claim_region() will skip over the CH
2733 2722 // regions. However, there is a subtle race between a CM thread
2734 2723 // executing this method and a mutator thread doing a humongous
2735 2724 // object allocation. The two are not mutually exclusive as the CM
2736 2725 // thread does not need to hold the Heap_lock when it gets
2737 2726 // here. So there is a chance that claim_region() will come across
2738 2727 // a free region that's in the progress of becoming a SH or a CH
2739 2728 // region. In the former case, it will either
2740 2729 // a) Miss the update to the region's end, in which case it will
2741 2730 // visit every subsequent CH region, will find their bitmaps
2742 2731 // empty, and do nothing, or
2743 2732 // b) Will observe the update of the region's end (in which case
2744 2733 // it will skip the subsequent CH regions).
2745 2734 // If it comes across a region that suddenly becomes CH, the
2746 2735 // scenario will be similar to b). So, the race between
2747 2736 // claim_region() and a humongous object allocation might force us
2748 2737 // to do a bit of unnecessary work (due to some unnecessary bitmap
2749 2738 // iterations) but it should not introduce and correctness issues.
2750 2739 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2751 2740 HeapWord* bottom = curr_region->bottom();
2752 2741 HeapWord* end = curr_region->end();
2753 2742 HeapWord* limit = curr_region->next_top_at_mark_start();
2754 2743
2755 2744 if (verbose_low()) {
2756 2745 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2757 2746 "["PTR_FORMAT", "PTR_FORMAT"), "
2758 2747 "limit = "PTR_FORMAT,
2759 2748 task_num, curr_region, bottom, end, limit);
2760 2749 }
2761 2750
2762 2751 // Is the gap between reading the finger and doing the CAS too long?
2763 2752 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2764 2753 if (res == finger) {
2765 2754 // we succeeded
2766 2755
2767 2756 // notice that _finger == end cannot be guaranteed here since,
2768 2757 // someone else might have moved the finger even further
2769 2758 assert(_finger >= end, "the finger should have moved forward");
2770 2759
2771 2760 if (verbose_low()) {
2772 2761 gclog_or_tty->print_cr("[%d] we were successful with region = "
2773 2762 PTR_FORMAT, task_num, curr_region);
2774 2763 }
2775 2764
2776 2765 if (limit > bottom) {
2777 2766 if (verbose_low()) {
2778 2767 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2779 2768 "returning it ", task_num, curr_region);
2780 2769 }
2781 2770 return curr_region;
2782 2771 } else {
2783 2772 assert(limit == bottom,
2784 2773 "the region limit should be at bottom");
2785 2774 if (verbose_low()) {
2786 2775 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2787 2776 "returning NULL", task_num, curr_region);
2788 2777 }
2789 2778 // we return NULL and the caller should try calling
2790 2779 // claim_region() again.
2791 2780 return NULL;
2792 2781 }
2793 2782 } else {
2794 2783 assert(_finger > finger, "the finger should have moved forward");
2795 2784 if (verbose_low()) {
2796 2785 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2797 2786 "global finger = "PTR_FORMAT", "
2798 2787 "our finger = "PTR_FORMAT,
2799 2788 task_num, _finger, finger);
2800 2789 }
2801 2790
2802 2791 // read it again
2803 2792 finger = _finger;
2804 2793 }
2805 2794 }
2806 2795
2807 2796 return NULL;
2808 2797 }
2809 2798
2810 2799 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2811 2800 bool result = false;
2812 2801 for (int i = 0; i < (int)_max_task_num; ++i) {
2813 2802 CMTask* the_task = _tasks[i];
2814 2803 MemRegion mr = the_task->aborted_region();
2815 2804 if (mr.start() != NULL) {
2816 2805 assert(mr.end() != NULL, "invariant");
2817 2806 assert(mr.word_size() > 0, "invariant");
2818 2807 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2819 2808 assert(hr != NULL, "invariant");
2820 2809 if (hr->in_collection_set()) {
2821 2810 // The region points into the collection set
2822 2811 the_task->set_aborted_region(MemRegion());
2823 2812 result = true;
2824 2813 }
2825 2814 }
2826 2815 }
2827 2816 return result;
2828 2817 }
2829 2818
2830 2819 bool ConcurrentMark::has_aborted_regions() {
2831 2820 for (int i = 0; i < (int)_max_task_num; ++i) {
2832 2821 CMTask* the_task = _tasks[i];
2833 2822 MemRegion mr = the_task->aborted_region();
2834 2823 if (mr.start() != NULL) {
2835 2824 assert(mr.end() != NULL, "invariant");
2836 2825 assert(mr.word_size() > 0, "invariant");
2837 2826 return true;
2838 2827 }
2839 2828 }
2840 2829 return false;
2841 2830 }
2842 2831
2843 2832 void ConcurrentMark::oops_do(OopClosure* cl) {
2844 2833 if (_markStack.size() > 0 && verbose_low()) {
2845 2834 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2846 2835 "size = %d", _markStack.size());
2847 2836 }
2848 2837 // we first iterate over the contents of the mark stack...
2849 2838 _markStack.oops_do(cl);
2850 2839
2851 2840 for (int i = 0; i < (int)_max_task_num; ++i) {
2852 2841 OopTaskQueue* queue = _task_queues->queue((int)i);
2853 2842
2854 2843 if (queue->size() > 0 && verbose_low()) {
2855 2844 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2856 2845 "size = %d", i, queue->size());
2857 2846 }
2858 2847
2859 2848 // ...then over the contents of the all the task queues.
2860 2849 queue->oops_do(cl);
2861 2850 }
2862 2851
2863 2852 // Invalidate any entries, that are in the region stack, that
2864 2853 // point into the collection set
2865 2854 if (_regionStack.invalidate_entries_into_cset()) {
2866 2855 // otherwise, any gray objects copied during the evacuation pause
2867 2856 // might not be visited.
2868 2857 assert(_should_gray_objects, "invariant");
2869 2858 }
2870 2859
2871 2860 // Invalidate any aborted regions, recorded in the individual CM
2872 2861 // tasks, that point into the collection set.
2873 2862 if (invalidate_aborted_regions_in_cset()) {
2874 2863 // otherwise, any gray objects copied during the evacuation pause
2875 2864 // might not be visited.
2876 2865 assert(_should_gray_objects, "invariant");
2877 2866 }
2878 2867
2879 2868 }
2880 2869
2881 2870 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2882 2871 _markStack.setEmpty();
2883 2872 _markStack.clear_overflow();
2884 2873 _regionStack.setEmpty();
2885 2874 _regionStack.clear_overflow();
2886 2875 if (clear_overflow) {
2887 2876 clear_has_overflown();
2888 2877 } else {
2889 2878 assert(has_overflown(), "pre-condition");
2890 2879 }
2891 2880 _finger = _heap_start;
2892 2881
2893 2882 for (int i = 0; i < (int)_max_task_num; ++i) {
2894 2883 OopTaskQueue* queue = _task_queues->queue(i);
2895 2884 queue->set_empty();
2896 2885 // Clear any partial regions from the CMTasks
2897 2886 _tasks[i]->clear_aborted_region();
2898 2887 }
2899 2888 }
2900 2889
2901 2890 void ConcurrentMark::print_stats() {
2902 2891 if (verbose_stats()) {
2903 2892 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2904 2893 for (size_t i = 0; i < _active_tasks; ++i) {
2905 2894 _tasks[i]->print_stats();
2906 2895 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2907 2896 }
2908 2897 }
2909 2898 }
2910 2899
2911 2900 // Closures used by ConcurrentMark::complete_marking_in_collection_set().
2912 2901
2913 2902 class CSetMarkOopClosure: public OopClosure {
2914 2903 friend class CSetMarkBitMapClosure;
2915 2904
2916 2905 G1CollectedHeap* _g1h;
2917 2906 CMBitMap* _bm;
2918 2907 ConcurrentMark* _cm;
2919 2908 oop* _ms;
2920 2909 jint* _array_ind_stack;
2921 2910 int _ms_size;
2922 2911 int _ms_ind;
2923 2912 int _array_increment;
2924 2913 int _worker_i;
2925 2914
2926 2915 bool push(oop obj, int arr_ind = 0) {
2927 2916 if (_ms_ind == _ms_size) {
2928 2917 gclog_or_tty->print_cr("Mark stack is full.");
2929 2918 return false;
2930 2919 }
2931 2920 _ms[_ms_ind] = obj;
2932 2921 if (obj->is_objArray()) {
2933 2922 _array_ind_stack[_ms_ind] = arr_ind;
2934 2923 }
2935 2924 _ms_ind++;
2936 2925 return true;
2937 2926 }
2938 2927
2939 2928 oop pop() {
2940 2929 if (_ms_ind == 0) {
2941 2930 return NULL;
2942 2931 } else {
2943 2932 _ms_ind--;
2944 2933 return _ms[_ms_ind];
2945 2934 }
2946 2935 }
2947 2936
2948 2937 template <class T> bool drain() {
2949 2938 while (_ms_ind > 0) {
2950 2939 oop obj = pop();
2951 2940 assert(obj != NULL, "Since index was non-zero.");
2952 2941 if (obj->is_objArray()) {
2953 2942 jint arr_ind = _array_ind_stack[_ms_ind];
2954 2943 objArrayOop aobj = objArrayOop(obj);
2955 2944 jint len = aobj->length();
2956 2945 jint next_arr_ind = arr_ind + _array_increment;
2957 2946 if (next_arr_ind < len) {
2958 2947 push(obj, next_arr_ind);
2959 2948 }
2960 2949 // Now process this portion of this one.
2961 2950 int lim = MIN2(next_arr_ind, len);
2962 2951 for (int j = arr_ind; j < lim; j++) {
2963 2952 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2964 2953 }
2965 2954 } else {
2966 2955 obj->oop_iterate(this);
2967 2956 }
2968 2957 if (abort()) return false;
2969 2958 }
2970 2959 return true;
2971 2960 }
2972 2961
2973 2962 public:
2974 2963 CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
2975 2964 _g1h(G1CollectedHeap::heap()),
2976 2965 _cm(cm),
2977 2966 _bm(cm->nextMarkBitMap()),
2978 2967 _ms_size(ms_size), _ms_ind(0),
2979 2968 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2980 2969 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2981 2970 _array_increment(MAX2(ms_size/8, 16)),
2982 2971 _worker_i(worker_i) { }
2983 2972
2984 2973 ~CSetMarkOopClosure() {
2985 2974 FREE_C_HEAP_ARRAY(oop, _ms);
2986 2975 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2987 2976 }
2988 2977
2989 2978 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2990 2979 virtual void do_oop( oop* p) { do_oop_work(p); }
2991 2980
2992 2981 template <class T> void do_oop_work(T* p) {
2993 2982 T heap_oop = oopDesc::load_heap_oop(p);
2994 2983 if (oopDesc::is_null(heap_oop)) return;
2995 2984 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2996 2985 if (obj->is_forwarded()) {
2997 2986 // If the object has already been forwarded, we have to make sure
2998 2987 // that it's marked. So follow the forwarding pointer. Note that
2999 2988 // this does the right thing for self-forwarding pointers in the
3000 2989 // evacuation failure case.
3001 2990 obj = obj->forwardee();
3002 2991 }
3003 2992 HeapRegion* hr = _g1h->heap_region_containing(obj);
3004 2993 if (hr != NULL) {
3005 2994 if (hr->in_collection_set()) {
3006 2995 if (_g1h->is_obj_ill(obj)) {
3007 2996 if (_bm->parMark((HeapWord*)obj)) {
3008 2997 if (!push(obj)) {
3009 2998 gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
3010 2999 set_abort();
3011 3000 }
3012 3001 }
3013 3002 }
3014 3003 } else {
3015 3004 // Outside the collection set; we need to gray it
3016 3005 _cm->deal_with_reference(obj);
3017 3006 }
3018 3007 }
3019 3008 }
3020 3009 };
3021 3010
3022 3011 class CSetMarkBitMapClosure: public BitMapClosure {
3023 3012 G1CollectedHeap* _g1h;
3024 3013 CMBitMap* _bitMap;
3025 3014 ConcurrentMark* _cm;
3026 3015 CSetMarkOopClosure _oop_cl;
3027 3016 int _worker_i;
3028 3017
3029 3018 public:
3030 3019 CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
3031 3020 _g1h(G1CollectedHeap::heap()),
3032 3021 _bitMap(cm->nextMarkBitMap()),
3033 3022 _oop_cl(cm, ms_size, worker_i),
3034 3023 _worker_i(worker_i) { }
3035 3024
3036 3025 bool do_bit(size_t offset) {
3037 3026 // convert offset into a HeapWord*
3038 3027 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3039 3028 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3040 3029 "address out of range");
3041 3030 assert(_bitMap->isMarked(addr), "tautology");
3042 3031 oop obj = oop(addr);
3043 3032 if (!obj->is_forwarded()) {
3044 3033 if (!_oop_cl.push(obj)) return false;
3045 3034 if (UseCompressedOops) {
3046 3035 if (!_oop_cl.drain<narrowOop>()) return false;
3047 3036 } else {
3048 3037 if (!_oop_cl.drain<oop>()) return false;
3049 3038 }
3050 3039 }
3051 3040 // Otherwise...
3052 3041 return true;
3053 3042 }
3054 3043 };
3055 3044
3056 3045 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
3057 3046 CMBitMap* _bm;
3058 3047 CSetMarkBitMapClosure _bit_cl;
3059 3048 int _worker_i;
3060 3049
3061 3050 enum SomePrivateConstants {
3062 3051 MSSize = 1000
3063 3052 };
3064 3053
3065 3054 public:
3066 3055 CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
3067 3056 _bm(cm->nextMarkBitMap()),
3068 3057 _bit_cl(cm, MSSize, worker_i),
3069 3058 _worker_i(worker_i) { }
3070 3059
3071 3060 bool doHeapRegion(HeapRegion* hr) {
3072 3061 if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
3073 3062 // The current worker has successfully claimed the region.
3074 3063 if (!hr->evacuation_failed()) {
3075 3064 MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start());
3076 3065 if (!mr.is_empty()) {
3077 3066 bool done = false;
3078 3067 while (!done) {
3079 3068 done = _bm->iterate(&_bit_cl, mr);
3080 3069 }
3081 3070 }
3082 3071 }
3083 3072 }
3084 3073 return false;
3085 3074 }
3086 3075 };
3087 3076
3088 3077 class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
3089 3078 jint _claim_value;
3090 3079
3091 3080 public:
3092 3081 SetClaimValuesInCSetHRClosure(jint claim_value) :
3093 3082 _claim_value(claim_value) { }
3094 3083
3095 3084 bool doHeapRegion(HeapRegion* hr) {
3096 3085 hr->set_claim_value(_claim_value);
3097 3086 return false;
3098 3087 }
3099 3088 };
3100 3089
3101 3090 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
3102 3091 protected:
3103 3092 G1CollectedHeap* _g1h;
3104 3093 ConcurrentMark* _cm;
3105 3094
3106 3095 public:
3107 3096 G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h,
3108 3097 ConcurrentMark* cm) :
3109 3098 AbstractGangTask("Complete Mark in CSet"),
3110 3099 _g1h(g1h), _cm(cm) { }
3111 3100
3112 3101 void work(int worker_i) {
3113 3102 CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
3114 3103 HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
3115 3104 _g1h->collection_set_iterate_from(hr, &cmplt);
3116 3105 }
3117 3106 };
3118 3107
3119 3108 void ConcurrentMark::complete_marking_in_collection_set() {
3120 3109 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3121 3110
3122 3111 if (!g1h->mark_in_progress()) {
3123 3112 g1h->g1_policy()->record_mark_closure_time(0.0);
3124 3113 return;
3125 3114 }
3126 3115
3127 3116 double start = os::elapsedTime();
3128 3117 G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
3129 3118
3130 3119 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3131 3120
3132 3121 if (G1CollectedHeap::use_parallel_gc_threads()) {
3133 3122 int n_workers = g1h->workers()->active_workers();
3134 3123 g1h->set_par_threads(n_workers);
3135 3124 g1h->workers()->run_task(&complete_mark_task);
3136 3125 g1h->set_par_threads(0);
3137 3126 } else {
3138 3127 complete_mark_task.work(0);
3139 3128 }
3140 3129
3141 3130 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
3142 3131
3143 3132 // Now reset the claim values in the regions in the collection set.
3144 3133 SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
3145 3134 g1h->collection_set_iterate(&set_cv_cl);
3146 3135
3147 3136 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3148 3137
3149 3138 double end_time = os::elapsedTime();
3150 3139 double elapsed_time_ms = (end_time - start) * 1000.0;
3151 3140 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3152 3141 }
3153 3142
3154 3143 // The next two methods deal with the following optimisation. Some
3155 3144 // objects are gray by being marked and located above the finger. If
3156 3145 // they are copied, during an evacuation pause, below the finger then
3157 3146 // the need to be pushed on the stack. The observation is that, if
3158 3147 // there are no regions in the collection set located above the
3159 3148 // finger, then the above cannot happen, hence we do not need to
3160 3149 // explicitly gray any objects when copying them to below the
3161 3150 // finger. The global stack will be scanned to ensure that, if it
3162 3151 // points to objects being copied, it will update their
3163 3152 // location. There is a tricky situation with the gray objects in
3164 3153 // region stack that are being coped, however. See the comment in
3165 3154 // newCSet().
3166 3155
3167 3156 void ConcurrentMark::newCSet() {
3168 3157 if (!concurrent_marking_in_progress()) {
3169 3158 // nothing to do if marking is not in progress
3170 3159 return;
3171 3160 }
3172 3161
3173 3162 // find what the lowest finger is among the global and local fingers
3174 3163 _min_finger = _finger;
3175 3164 for (int i = 0; i < (int)_max_task_num; ++i) {
3176 3165 CMTask* task = _tasks[i];
3177 3166 HeapWord* task_finger = task->finger();
3178 3167 if (task_finger != NULL && task_finger < _min_finger) {
3179 3168 _min_finger = task_finger;
3180 3169 }
3181 3170 }
3182 3171
3183 3172 _should_gray_objects = false;
3184 3173
3185 3174 // This fixes a very subtle and fustrating bug. It might be the case
3186 3175 // that, during en evacuation pause, heap regions that contain
3187 3176 // objects that are gray (by being in regions contained in the
3188 3177 // region stack) are included in the collection set. Since such gray
3189 3178 // objects will be moved, and because it's not easy to redirect
3190 3179 // region stack entries to point to a new location (because objects
3191 3180 // in one region might be scattered to multiple regions after they
3192 3181 // are copied), one option is to ensure that all marked objects
3193 3182 // copied during a pause are pushed on the stack. Notice, however,
3194 3183 // that this problem can only happen when the region stack is not
3195 3184 // empty during an evacuation pause. So, we make the fix a bit less
3196 3185 // conservative and ensure that regions are pushed on the stack,
3197 3186 // irrespective whether all collection set regions are below the
3198 3187 // finger, if the region stack is not empty. This is expected to be
3199 3188 // a rare case, so I don't think it's necessary to be smarted about it.
3200 3189 if (!region_stack_empty() || has_aborted_regions()) {
3201 3190 _should_gray_objects = true;
3202 3191 }
3203 3192 }
3204 3193
3205 3194 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3206 3195 if (!concurrent_marking_in_progress()) return;
3207 3196
3208 3197 HeapWord* region_end = hr->end();
3209 3198 if (region_end > _min_finger) {
3210 3199 _should_gray_objects = true;
3211 3200 }
3212 3201 }
3213 3202
3214 3203 // Resets the region fields of active CMTasks whose values point
3215 3204 // into the collection set.
3216 3205 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3217 3206 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3218 3207 assert(parallel_marking_threads() <= _max_task_num, "sanity");
3219 3208
3220 3209 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3221 3210 CMTask* task = _tasks[i];
3222 3211 HeapWord* task_finger = task->finger();
3223 3212 if (task_finger != NULL) {
3224 3213 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3225 3214 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3226 3215 if (finger_region->in_collection_set()) {
3227 3216 // The task's current region is in the collection set.
3228 3217 // This region will be evacuated in the current GC and
3229 3218 // the region fields in the task will be stale.
3230 3219 task->giveup_current_region();
3231 3220 }
3232 3221 }
3233 3222 }
3234 3223 }
3235 3224
3236 3225 // abandon current marking iteration due to a Full GC
3237 3226 void ConcurrentMark::abort() {
3238 3227 // Clear all marks to force marking thread to do nothing
3239 3228 _nextMarkBitMap->clearAll();
3240 3229 // Empty mark stack
3241 3230 clear_marking_state();
3242 3231 for (int i = 0; i < (int)_max_task_num; ++i) {
3243 3232 _tasks[i]->clear_region_fields();
3244 3233 }
3245 3234 _has_aborted = true;
3246 3235
3247 3236 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3248 3237 satb_mq_set.abandon_partial_marking();
3249 3238 // This can be called either during or outside marking, we'll read
3250 3239 // the expected_active value from the SATB queue set.
3251 3240 satb_mq_set.set_active_all_threads(
3252 3241 false, /* new active value */
3253 3242 satb_mq_set.is_active() /* expected_active */);
3254 3243 }
3255 3244
3256 3245 static void print_ms_time_info(const char* prefix, const char* name,
3257 3246 NumberSeq& ns) {
3258 3247 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3259 3248 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3260 3249 if (ns.num() > 0) {
3261 3250 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3262 3251 prefix, ns.sd(), ns.maximum());
3263 3252 }
3264 3253 }
3265 3254
3266 3255 void ConcurrentMark::print_summary_info() {
3267 3256 gclog_or_tty->print_cr(" Concurrent marking:");
3268 3257 print_ms_time_info(" ", "init marks", _init_times);
3269 3258 print_ms_time_info(" ", "remarks", _remark_times);
3270 3259 {
3271 3260 print_ms_time_info(" ", "final marks", _remark_mark_times);
3272 3261 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3273 3262
3274 3263 }
3275 3264 print_ms_time_info(" ", "cleanups", _cleanup_times);
3276 3265 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3277 3266 _total_counting_time,
3278 3267 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3279 3268 (double)_cleanup_times.num()
3280 3269 : 0.0));
3281 3270 if (G1ScrubRemSets) {
3282 3271 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3283 3272 _total_rs_scrub_time,
3284 3273 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3285 3274 (double)_cleanup_times.num()
3286 3275 : 0.0));
3287 3276 }
3288 3277 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3289 3278 (_init_times.sum() + _remark_times.sum() +
3290 3279 _cleanup_times.sum())/1000.0);
3291 3280 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3292 3281 "(%8.2f s marking, %8.2f s counting).",
3293 3282 cmThread()->vtime_accum(),
3294 3283 cmThread()->vtime_mark_accum(),
3295 3284 cmThread()->vtime_count_accum());
3296 3285 }
3297 3286
3298 3287 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3299 3288 _parallel_workers->print_worker_threads_on(st);
3300 3289 }
3301 3290
3302 3291 // Closures
3303 3292 // XXX: there seems to be a lot of code duplication here;
3304 3293 // should refactor and consolidate the shared code.
3305 3294
3306 3295 // This closure is used to mark refs into the CMS generation in
3307 3296 // the CMS bit map. Called at the first checkpoint.
3308 3297
3309 3298 // We take a break if someone is trying to stop the world.
3310 3299 bool ConcurrentMark::do_yield_check(int worker_i) {
3311 3300 if (should_yield()) {
3312 3301 if (worker_i == 0) {
3313 3302 _g1h->g1_policy()->record_concurrent_pause();
3314 3303 }
3315 3304 cmThread()->yield();
3316 3305 if (worker_i == 0) {
3317 3306 _g1h->g1_policy()->record_concurrent_pause_end();
3318 3307 }
3319 3308 return true;
3320 3309 } else {
3321 3310 return false;
3322 3311 }
3323 3312 }
3324 3313
3325 3314 bool ConcurrentMark::should_yield() {
3326 3315 return cmThread()->should_yield();
3327 3316 }
3328 3317
3329 3318 bool ConcurrentMark::containing_card_is_marked(void* p) {
3330 3319 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3331 3320 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3332 3321 }
3333 3322
3334 3323 bool ConcurrentMark::containing_cards_are_marked(void* start,
3335 3324 void* last) {
3336 3325 return containing_card_is_marked(start) &&
3337 3326 containing_card_is_marked(last);
3338 3327 }
3339 3328
3340 3329 #ifndef PRODUCT
3341 3330 // for debugging purposes
3342 3331 void ConcurrentMark::print_finger() {
3343 3332 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3344 3333 _heap_start, _heap_end, _finger);
3345 3334 for (int i = 0; i < (int) _max_task_num; ++i) {
3346 3335 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
3347 3336 }
3348 3337 gclog_or_tty->print_cr("");
3349 3338 }
3350 3339 #endif
3351 3340
3352 3341 void CMTask::scan_object(oop obj) {
3353 3342 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3354 3343
3355 3344 if (_cm->verbose_high()) {
3356 3345 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3357 3346 _task_id, (void*) obj);
3358 3347 }
3359 3348
3360 3349 size_t obj_size = obj->size();
3361 3350 _words_scanned += obj_size;
3362 3351
3363 3352 obj->oop_iterate(_cm_oop_closure);
3364 3353 statsOnly( ++_objs_scanned );
3365 3354 check_limits();
3366 3355 }
3367 3356
3368 3357 // Closure for iteration over bitmaps
3369 3358 class CMBitMapClosure : public BitMapClosure {
3370 3359 private:
3371 3360 // the bitmap that is being iterated over
3372 3361 CMBitMap* _nextMarkBitMap;
3373 3362 ConcurrentMark* _cm;
3374 3363 CMTask* _task;
3375 3364 // true if we're scanning a heap region claimed by the task (so that
3376 3365 // we move the finger along), false if we're not, i.e. currently when
3377 3366 // scanning a heap region popped from the region stack (so that we
3378 3367 // do not move the task finger along; it'd be a mistake if we did so).
3379 3368 bool _scanning_heap_region;
3380 3369
3381 3370 public:
3382 3371 CMBitMapClosure(CMTask *task,
3383 3372 ConcurrentMark* cm,
3384 3373 CMBitMap* nextMarkBitMap)
3385 3374 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3386 3375
3387 3376 void set_scanning_heap_region(bool scanning_heap_region) {
3388 3377 _scanning_heap_region = scanning_heap_region;
3389 3378 }
3390 3379
3391 3380 bool do_bit(size_t offset) {
3392 3381 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3393 3382 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3394 3383 assert( addr < _cm->finger(), "invariant");
3395 3384
3396 3385 if (_scanning_heap_region) {
3397 3386 statsOnly( _task->increase_objs_found_on_bitmap() );
3398 3387 assert(addr >= _task->finger(), "invariant");
3399 3388 // We move that task's local finger along.
3400 3389 _task->move_finger_to(addr);
3401 3390 } else {
3402 3391 // We move the task's region finger along.
3403 3392 _task->move_region_finger_to(addr);
3404 3393 }
3405 3394
3406 3395 _task->scan_object(oop(addr));
3407 3396 // we only partially drain the local queue and global stack
3408 3397 _task->drain_local_queue(true);
3409 3398 _task->drain_global_stack(true);
3410 3399
3411 3400 // if the has_aborted flag has been raised, we need to bail out of
3412 3401 // the iteration
3413 3402 return !_task->has_aborted();
3414 3403 }
3415 3404 };
3416 3405
3417 3406 // Closure for iterating over objects, currently only used for
3418 3407 // processing SATB buffers.
3419 3408 class CMObjectClosure : public ObjectClosure {
3420 3409 private:
3421 3410 CMTask* _task;
3422 3411
3423 3412 public:
3424 3413 void do_object(oop obj) {
3425 3414 _task->deal_with_reference(obj);
3426 3415 }
3427 3416
3428 3417 CMObjectClosure(CMTask* task) : _task(task) { }
3429 3418 };
3430 3419
3431 3420 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3432 3421 ConcurrentMark* cm,
3433 3422 CMTask* task)
3434 3423 : _g1h(g1h), _cm(cm), _task(task) {
3435 3424 assert(_ref_processor == NULL, "should be initialized to NULL");
3436 3425
3437 3426 if (G1UseConcMarkReferenceProcessing) {
3438 3427 _ref_processor = g1h->ref_processor_cm();
3439 3428 assert(_ref_processor != NULL, "should not be NULL");
3440 3429 }
3441 3430 }
3442 3431
3443 3432 void CMTask::setup_for_region(HeapRegion* hr) {
3444 3433 // Separated the asserts so that we know which one fires.
3445 3434 assert(hr != NULL,
3446 3435 "claim_region() should have filtered out continues humongous regions");
3447 3436 assert(!hr->continuesHumongous(),
3448 3437 "claim_region() should have filtered out continues humongous regions");
3449 3438
3450 3439 if (_cm->verbose_low()) {
3451 3440 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3452 3441 _task_id, hr);
3453 3442 }
3454 3443
3455 3444 _curr_region = hr;
3456 3445 _finger = hr->bottom();
3457 3446 update_region_limit();
3458 3447 }
3459 3448
3460 3449 void CMTask::update_region_limit() {
3461 3450 HeapRegion* hr = _curr_region;
3462 3451 HeapWord* bottom = hr->bottom();
3463 3452 HeapWord* limit = hr->next_top_at_mark_start();
3464 3453
3465 3454 if (limit == bottom) {
3466 3455 if (_cm->verbose_low()) {
3467 3456 gclog_or_tty->print_cr("[%d] found an empty region "
3468 3457 "["PTR_FORMAT", "PTR_FORMAT")",
3469 3458 _task_id, bottom, limit);
3470 3459 }
3471 3460 // The region was collected underneath our feet.
3472 3461 // We set the finger to bottom to ensure that the bitmap
3473 3462 // iteration that will follow this will not do anything.
3474 3463 // (this is not a condition that holds when we set the region up,
3475 3464 // as the region is not supposed to be empty in the first place)
3476 3465 _finger = bottom;
3477 3466 } else if (limit >= _region_limit) {
3478 3467 assert(limit >= _finger, "peace of mind");
3479 3468 } else {
3480 3469 assert(limit < _region_limit, "only way to get here");
3481 3470 // This can happen under some pretty unusual circumstances. An
3482 3471 // evacuation pause empties the region underneath our feet (NTAMS
3483 3472 // at bottom). We then do some allocation in the region (NTAMS
3484 3473 // stays at bottom), followed by the region being used as a GC
3485 3474 // alloc region (NTAMS will move to top() and the objects
3486 3475 // originally below it will be grayed). All objects now marked in
3487 3476 // the region are explicitly grayed, if below the global finger,
3488 3477 // and we do not need in fact to scan anything else. So, we simply
3489 3478 // set _finger to be limit to ensure that the bitmap iteration
3490 3479 // doesn't do anything.
3491 3480 _finger = limit;
3492 3481 }
3493 3482
3494 3483 _region_limit = limit;
3495 3484 }
3496 3485
3497 3486 void CMTask::giveup_current_region() {
3498 3487 assert(_curr_region != NULL, "invariant");
3499 3488 if (_cm->verbose_low()) {
3500 3489 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3501 3490 _task_id, _curr_region);
3502 3491 }
3503 3492 clear_region_fields();
3504 3493 }
3505 3494
3506 3495 void CMTask::clear_region_fields() {
3507 3496 // Values for these three fields that indicate that we're not
3508 3497 // holding on to a region.
3509 3498 _curr_region = NULL;
3510 3499 _finger = NULL;
3511 3500 _region_limit = NULL;
3512 3501
3513 3502 _region_finger = NULL;
3514 3503 }
3515 3504
3516 3505 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3517 3506 if (cm_oop_closure == NULL) {
3518 3507 assert(_cm_oop_closure != NULL, "invariant");
3519 3508 } else {
3520 3509 assert(_cm_oop_closure == NULL, "invariant");
3521 3510 }
3522 3511 _cm_oop_closure = cm_oop_closure;
3523 3512 }
3524 3513
3525 3514 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3526 3515 guarantee(nextMarkBitMap != NULL, "invariant");
3527 3516
3528 3517 if (_cm->verbose_low()) {
3529 3518 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3530 3519 }
3531 3520
3532 3521 _nextMarkBitMap = nextMarkBitMap;
3533 3522 clear_region_fields();
3534 3523 assert(_aborted_region.is_empty(), "should have been cleared");
3535 3524
3536 3525 _calls = 0;
3537 3526 _elapsed_time_ms = 0.0;
3538 3527 _termination_time_ms = 0.0;
3539 3528 _termination_start_time_ms = 0.0;
3540 3529
3541 3530 #if _MARKING_STATS_
3542 3531 _local_pushes = 0;
3543 3532 _local_pops = 0;
3544 3533 _local_max_size = 0;
3545 3534 _objs_scanned = 0;
3546 3535 _global_pushes = 0;
3547 3536 _global_pops = 0;
3548 3537 _global_max_size = 0;
3549 3538 _global_transfers_to = 0;
3550 3539 _global_transfers_from = 0;
3551 3540 _region_stack_pops = 0;
3552 3541 _regions_claimed = 0;
3553 3542 _objs_found_on_bitmap = 0;
3554 3543 _satb_buffers_processed = 0;
3555 3544 _steal_attempts = 0;
3556 3545 _steals = 0;
3557 3546 _aborted = 0;
3558 3547 _aborted_overflow = 0;
3559 3548 _aborted_cm_aborted = 0;
3560 3549 _aborted_yield = 0;
3561 3550 _aborted_timed_out = 0;
3562 3551 _aborted_satb = 0;
3563 3552 _aborted_termination = 0;
3564 3553 #endif // _MARKING_STATS_
3565 3554 }
3566 3555
3567 3556 bool CMTask::should_exit_termination() {
3568 3557 regular_clock_call();
3569 3558 // This is called when we are in the termination protocol. We should
3570 3559 // quit if, for some reason, this task wants to abort or the global
3571 3560 // stack is not empty (this means that we can get work from it).
3572 3561 return !_cm->mark_stack_empty() || has_aborted();
3573 3562 }
3574 3563
3575 3564 void CMTask::reached_limit() {
3576 3565 assert(_words_scanned >= _words_scanned_limit ||
3577 3566 _refs_reached >= _refs_reached_limit ,
3578 3567 "shouldn't have been called otherwise");
3579 3568 regular_clock_call();
3580 3569 }
3581 3570
3582 3571 void CMTask::regular_clock_call() {
3583 3572 if (has_aborted()) return;
3584 3573
3585 3574 // First, we need to recalculate the words scanned and refs reached
3586 3575 // limits for the next clock call.
3587 3576 recalculate_limits();
3588 3577
3589 3578 // During the regular clock call we do the following
3590 3579
3591 3580 // (1) If an overflow has been flagged, then we abort.
3592 3581 if (_cm->has_overflown()) {
3593 3582 set_has_aborted();
3594 3583 return;
3595 3584 }
3596 3585
3597 3586 // If we are not concurrent (i.e. we're doing remark) we don't need
3598 3587 // to check anything else. The other steps are only needed during
3599 3588 // the concurrent marking phase.
3600 3589 if (!concurrent()) return;
3601 3590
3602 3591 // (2) If marking has been aborted for Full GC, then we also abort.
3603 3592 if (_cm->has_aborted()) {
3604 3593 set_has_aborted();
3605 3594 statsOnly( ++_aborted_cm_aborted );
3606 3595 return;
3607 3596 }
3608 3597
3609 3598 double curr_time_ms = os::elapsedVTime() * 1000.0;
3610 3599
3611 3600 // (3) If marking stats are enabled, then we update the step history.
3612 3601 #if _MARKING_STATS_
3613 3602 if (_words_scanned >= _words_scanned_limit) {
3614 3603 ++_clock_due_to_scanning;
3615 3604 }
3616 3605 if (_refs_reached >= _refs_reached_limit) {
3617 3606 ++_clock_due_to_marking;
3618 3607 }
3619 3608
3620 3609 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3621 3610 _interval_start_time_ms = curr_time_ms;
3622 3611 _all_clock_intervals_ms.add(last_interval_ms);
3623 3612
3624 3613 if (_cm->verbose_medium()) {
3625 3614 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3626 3615 "scanned = %d%s, refs reached = %d%s",
3627 3616 _task_id, last_interval_ms,
3628 3617 _words_scanned,
3629 3618 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3630 3619 _refs_reached,
3631 3620 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3632 3621 }
3633 3622 #endif // _MARKING_STATS_
3634 3623
3635 3624 // (4) We check whether we should yield. If we have to, then we abort.
3636 3625 if (_cm->should_yield()) {
3637 3626 // We should yield. To do this we abort the task. The caller is
3638 3627 // responsible for yielding.
3639 3628 set_has_aborted();
3640 3629 statsOnly( ++_aborted_yield );
3641 3630 return;
3642 3631 }
3643 3632
3644 3633 // (5) We check whether we've reached our time quota. If we have,
3645 3634 // then we abort.
3646 3635 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3647 3636 if (elapsed_time_ms > _time_target_ms) {
3648 3637 set_has_aborted();
3649 3638 _has_timed_out = true;
3650 3639 statsOnly( ++_aborted_timed_out );
3651 3640 return;
3652 3641 }
3653 3642
3654 3643 // (6) Finally, we check whether there are enough completed STAB
3655 3644 // buffers available for processing. If there are, we abort.
3656 3645 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3657 3646 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3658 3647 if (_cm->verbose_low()) {
3659 3648 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3660 3649 _task_id);
3661 3650 }
3662 3651 // we do need to process SATB buffers, we'll abort and restart
3663 3652 // the marking task to do so
3664 3653 set_has_aborted();
3665 3654 statsOnly( ++_aborted_satb );
3666 3655 return;
3667 3656 }
3668 3657 }
3669 3658
3670 3659 void CMTask::recalculate_limits() {
3671 3660 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3672 3661 _words_scanned_limit = _real_words_scanned_limit;
3673 3662
3674 3663 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3675 3664 _refs_reached_limit = _real_refs_reached_limit;
3676 3665 }
3677 3666
3678 3667 void CMTask::decrease_limits() {
3679 3668 // This is called when we believe that we're going to do an infrequent
3680 3669 // operation which will increase the per byte scanned cost (i.e. move
3681 3670 // entries to/from the global stack). It basically tries to decrease the
3682 3671 // scanning limit so that the clock is called earlier.
3683 3672
3684 3673 if (_cm->verbose_medium()) {
3685 3674 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3686 3675 }
3687 3676
3688 3677 _words_scanned_limit = _real_words_scanned_limit -
3689 3678 3 * words_scanned_period / 4;
3690 3679 _refs_reached_limit = _real_refs_reached_limit -
3691 3680 3 * refs_reached_period / 4;
3692 3681 }
3693 3682
3694 3683 void CMTask::move_entries_to_global_stack() {
3695 3684 // local array where we'll store the entries that will be popped
3696 3685 // from the local queue
3697 3686 oop buffer[global_stack_transfer_size];
3698 3687
3699 3688 int n = 0;
3700 3689 oop obj;
3701 3690 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3702 3691 buffer[n] = obj;
3703 3692 ++n;
3704 3693 }
3705 3694
3706 3695 if (n > 0) {
3707 3696 // we popped at least one entry from the local queue
3708 3697
3709 3698 statsOnly( ++_global_transfers_to; _local_pops += n );
3710 3699
3711 3700 if (!_cm->mark_stack_push(buffer, n)) {
3712 3701 if (_cm->verbose_low()) {
3713 3702 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3714 3703 _task_id);
3715 3704 }
3716 3705 set_has_aborted();
3717 3706 } else {
3718 3707 // the transfer was successful
3719 3708
3720 3709 if (_cm->verbose_medium()) {
3721 3710 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3722 3711 _task_id, n);
3723 3712 }
3724 3713 statsOnly( int tmp_size = _cm->mark_stack_size();
3725 3714 if (tmp_size > _global_max_size) {
3726 3715 _global_max_size = tmp_size;
3727 3716 }
3728 3717 _global_pushes += n );
3729 3718 }
3730 3719 }
3731 3720
3732 3721 // this operation was quite expensive, so decrease the limits
3733 3722 decrease_limits();
3734 3723 }
3735 3724
3736 3725 void CMTask::get_entries_from_global_stack() {
3737 3726 // local array where we'll store the entries that will be popped
3738 3727 // from the global stack.
3739 3728 oop buffer[global_stack_transfer_size];
3740 3729 int n;
3741 3730 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3742 3731 assert(n <= global_stack_transfer_size,
3743 3732 "we should not pop more than the given limit");
3744 3733 if (n > 0) {
3745 3734 // yes, we did actually pop at least one entry
3746 3735
3747 3736 statsOnly( ++_global_transfers_from; _global_pops += n );
3748 3737 if (_cm->verbose_medium()) {
3749 3738 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3750 3739 _task_id, n);
3751 3740 }
3752 3741 for (int i = 0; i < n; ++i) {
3753 3742 bool success = _task_queue->push(buffer[i]);
3754 3743 // We only call this when the local queue is empty or under a
3755 3744 // given target limit. So, we do not expect this push to fail.
3756 3745 assert(success, "invariant");
3757 3746 }
3758 3747
3759 3748 statsOnly( int tmp_size = _task_queue->size();
3760 3749 if (tmp_size > _local_max_size) {
3761 3750 _local_max_size = tmp_size;
3762 3751 }
3763 3752 _local_pushes += n );
3764 3753 }
3765 3754
3766 3755 // this operation was quite expensive, so decrease the limits
3767 3756 decrease_limits();
3768 3757 }
3769 3758
3770 3759 void CMTask::drain_local_queue(bool partially) {
3771 3760 if (has_aborted()) return;
3772 3761
3773 3762 // Decide what the target size is, depending whether we're going to
3774 3763 // drain it partially (so that other tasks can steal if they run out
3775 3764 // of things to do) or totally (at the very end).
3776 3765 size_t target_size;
3777 3766 if (partially) {
3778 3767 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3779 3768 } else {
3780 3769 target_size = 0;
3781 3770 }
3782 3771
3783 3772 if (_task_queue->size() > target_size) {
3784 3773 if (_cm->verbose_high()) {
3785 3774 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3786 3775 _task_id, target_size);
3787 3776 }
3788 3777
3789 3778 oop obj;
3790 3779 bool ret = _task_queue->pop_local(obj);
3791 3780 while (ret) {
3792 3781 statsOnly( ++_local_pops );
3793 3782
3794 3783 if (_cm->verbose_high()) {
3795 3784 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3796 3785 (void*) obj);
3797 3786 }
3798 3787
3799 3788 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3800 3789 assert(!_g1h->is_on_master_free_list(
3801 3790 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3802 3791
3803 3792 scan_object(obj);
3804 3793
3805 3794 if (_task_queue->size() <= target_size || has_aborted()) {
3806 3795 ret = false;
3807 3796 } else {
3808 3797 ret = _task_queue->pop_local(obj);
3809 3798 }
3810 3799 }
3811 3800
3812 3801 if (_cm->verbose_high()) {
3813 3802 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3814 3803 _task_id, _task_queue->size());
3815 3804 }
3816 3805 }
3817 3806 }
3818 3807
3819 3808 void CMTask::drain_global_stack(bool partially) {
3820 3809 if (has_aborted()) return;
3821 3810
3822 3811 // We have a policy to drain the local queue before we attempt to
3823 3812 // drain the global stack.
3824 3813 assert(partially || _task_queue->size() == 0, "invariant");
3825 3814
3826 3815 // Decide what the target size is, depending whether we're going to
3827 3816 // drain it partially (so that other tasks can steal if they run out
3828 3817 // of things to do) or totally (at the very end). Notice that,
3829 3818 // because we move entries from the global stack in chunks or
3830 3819 // because another task might be doing the same, we might in fact
3831 3820 // drop below the target. But, this is not a problem.
3832 3821 size_t target_size;
3833 3822 if (partially) {
3834 3823 target_size = _cm->partial_mark_stack_size_target();
3835 3824 } else {
3836 3825 target_size = 0;
3837 3826 }
3838 3827
3839 3828 if (_cm->mark_stack_size() > target_size) {
3840 3829 if (_cm->verbose_low()) {
3841 3830 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3842 3831 _task_id, target_size);
3843 3832 }
3844 3833
3845 3834 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3846 3835 get_entries_from_global_stack();
3847 3836 drain_local_queue(partially);
3848 3837 }
3849 3838
3850 3839 if (_cm->verbose_low()) {
3851 3840 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3852 3841 _task_id, _cm->mark_stack_size());
3853 3842 }
3854 3843 }
3855 3844 }
3856 3845
3857 3846 // SATB Queue has several assumptions on whether to call the par or
3858 3847 // non-par versions of the methods. this is why some of the code is
3859 3848 // replicated. We should really get rid of the single-threaded version
3860 3849 // of the code to simplify things.
3861 3850 void CMTask::drain_satb_buffers() {
3862 3851 if (has_aborted()) return;
3863 3852
3864 3853 // We set this so that the regular clock knows that we're in the
3865 3854 // middle of draining buffers and doesn't set the abort flag when it
3866 3855 // notices that SATB buffers are available for draining. It'd be
3867 3856 // very counter productive if it did that. :-)
3868 3857 _draining_satb_buffers = true;
3869 3858
3870 3859 CMObjectClosure oc(this);
3871 3860 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3872 3861 if (G1CollectedHeap::use_parallel_gc_threads()) {
3873 3862 satb_mq_set.set_par_closure(_task_id, &oc);
3874 3863 } else {
3875 3864 satb_mq_set.set_closure(&oc);
3876 3865 }
3877 3866
3878 3867 // This keeps claiming and applying the closure to completed buffers
3879 3868 // until we run out of buffers or we need to abort.
3880 3869 if (G1CollectedHeap::use_parallel_gc_threads()) {
3881 3870 while (!has_aborted() &&
3882 3871 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3883 3872 if (_cm->verbose_medium()) {
3884 3873 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3885 3874 }
3886 3875 statsOnly( ++_satb_buffers_processed );
3887 3876 regular_clock_call();
3888 3877 }
3889 3878 } else {
3890 3879 while (!has_aborted() &&
3891 3880 satb_mq_set.apply_closure_to_completed_buffer()) {
3892 3881 if (_cm->verbose_medium()) {
3893 3882 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3894 3883 }
3895 3884 statsOnly( ++_satb_buffers_processed );
3896 3885 regular_clock_call();
3897 3886 }
3898 3887 }
3899 3888
3900 3889 if (!concurrent() && !has_aborted()) {
3901 3890 // We should only do this during remark.
3902 3891 if (G1CollectedHeap::use_parallel_gc_threads()) {
3903 3892 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3904 3893 } else {
3905 3894 satb_mq_set.iterate_closure_all_threads();
3906 3895 }
3907 3896 }
3908 3897
3909 3898 _draining_satb_buffers = false;
3910 3899
3911 3900 assert(has_aborted() ||
3912 3901 concurrent() ||
3913 3902 satb_mq_set.completed_buffers_num() == 0, "invariant");
3914 3903
3915 3904 if (G1CollectedHeap::use_parallel_gc_threads()) {
3916 3905 satb_mq_set.set_par_closure(_task_id, NULL);
3917 3906 } else {
3918 3907 satb_mq_set.set_closure(NULL);
3919 3908 }
3920 3909
3921 3910 // again, this was a potentially expensive operation, decrease the
3922 3911 // limits to get the regular clock call early
3923 3912 decrease_limits();
3924 3913 }
3925 3914
3926 3915 void CMTask::drain_region_stack(BitMapClosure* bc) {
3927 3916 if (has_aborted()) return;
3928 3917
3929 3918 assert(_region_finger == NULL,
3930 3919 "it should be NULL when we're not scanning a region");
3931 3920
3932 3921 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3933 3922 if (_cm->verbose_low()) {
3934 3923 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3935 3924 _task_id, _cm->region_stack_size());
3936 3925 }
3937 3926
3938 3927 MemRegion mr;
3939 3928
3940 3929 if (!_aborted_region.is_empty()) {
3941 3930 mr = _aborted_region;
3942 3931 _aborted_region = MemRegion();
3943 3932
3944 3933 if (_cm->verbose_low()) {
3945 3934 gclog_or_tty->print_cr("[%d] scanning aborted region "
3946 3935 "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3947 3936 _task_id, mr.start(), mr.end());
3948 3937 }
3949 3938 } else {
3950 3939 mr = _cm->region_stack_pop_lock_free();
3951 3940 // it returns MemRegion() if the pop fails
3952 3941 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3953 3942 }
3954 3943
3955 3944 while (mr.start() != NULL) {
3956 3945 if (_cm->verbose_medium()) {
3957 3946 gclog_or_tty->print_cr("[%d] we are scanning region "
3958 3947 "["PTR_FORMAT", "PTR_FORMAT")",
3959 3948 _task_id, mr.start(), mr.end());
3960 3949 }
3961 3950
3962 3951 assert(mr.end() <= _cm->finger(),
3963 3952 "otherwise the region shouldn't be on the stack");
3964 3953 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3965 3954 if (_nextMarkBitMap->iterate(bc, mr)) {
3966 3955 assert(!has_aborted(),
3967 3956 "cannot abort the task without aborting the bitmap iteration");
3968 3957
3969 3958 // We finished iterating over the region without aborting.
3970 3959 regular_clock_call();
3971 3960 if (has_aborted()) {
3972 3961 mr = MemRegion();
3973 3962 } else {
3974 3963 mr = _cm->region_stack_pop_lock_free();
3975 3964 // it returns MemRegion() if the pop fails
3976 3965 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3977 3966 }
3978 3967 } else {
3979 3968 assert(has_aborted(), "currently the only way to do so");
3980 3969
3981 3970 // The only way to abort the bitmap iteration is to return
3982 3971 // false from the do_bit() method. However, inside the
3983 3972 // do_bit() method we move the _region_finger to point to the
3984 3973 // object currently being looked at. So, if we bail out, we
3985 3974 // have definitely set _region_finger to something non-null.
3986 3975 assert(_region_finger != NULL, "invariant");
3987 3976
3988 3977 // Make sure that any previously aborted region has been
3989 3978 // cleared.
3990 3979 assert(_aborted_region.is_empty(), "aborted region not cleared");
3991 3980
3992 3981 // The iteration was actually aborted. So now _region_finger
3993 3982 // points to the address of the object we last scanned. If we
3994 3983 // leave it there, when we restart this task, we will rescan
3995 3984 // the object. It is easy to avoid this. We move the finger by
3996 3985 // enough to point to the next possible object header (the
3997 3986 // bitmap knows by how much we need to move it as it knows its
3998 3987 // granularity).
3999 3988 MemRegion newRegion =
4000 3989 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
4001 3990
4002 3991 if (!newRegion.is_empty()) {
4003 3992 if (_cm->verbose_low()) {
4004 3993 gclog_or_tty->print_cr("[%d] recording unscanned region"
4005 3994 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
4006 3995 _task_id,
4007 3996 newRegion.start(), newRegion.end());
4008 3997 }
4009 3998 // Now record the part of the region we didn't scan to
4010 3999 // make sure this task scans it later.
4011 4000 _aborted_region = newRegion;
4012 4001 }
4013 4002 // break from while
4014 4003 mr = MemRegion();
4015 4004 }
4016 4005 _region_finger = NULL;
4017 4006 }
4018 4007
4019 4008 if (_cm->verbose_low()) {
4020 4009 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
4021 4010 _task_id, _cm->region_stack_size());
4022 4011 }
4023 4012 }
4024 4013 }
4025 4014
4026 4015 void CMTask::print_stats() {
4027 4016 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
4028 4017 _task_id, _calls);
4029 4018 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4030 4019 _elapsed_time_ms, _termination_time_ms);
4031 4020 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4032 4021 _step_times_ms.num(), _step_times_ms.avg(),
4033 4022 _step_times_ms.sd());
4034 4023 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4035 4024 _step_times_ms.maximum(), _step_times_ms.sum());
4036 4025
4037 4026 #if _MARKING_STATS_
4038 4027 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4039 4028 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4040 4029 _all_clock_intervals_ms.sd());
4041 4030 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4042 4031 _all_clock_intervals_ms.maximum(),
4043 4032 _all_clock_intervals_ms.sum());
4044 4033 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
4045 4034 _clock_due_to_scanning, _clock_due_to_marking);
4046 4035 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
4047 4036 _objs_scanned, _objs_found_on_bitmap);
4048 4037 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
4049 4038 _local_pushes, _local_pops, _local_max_size);
4050 4039 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
4051 4040 _global_pushes, _global_pops, _global_max_size);
4052 4041 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
4053 4042 _global_transfers_to,_global_transfers_from);
4054 4043 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
4055 4044 _regions_claimed, _region_stack_pops);
4056 4045 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
4057 4046 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
4058 4047 _steal_attempts, _steals);
4059 4048 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
4060 4049 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
4061 4050 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4062 4051 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
4063 4052 _aborted_timed_out, _aborted_satb, _aborted_termination);
4064 4053 #endif // _MARKING_STATS_
4065 4054 }
4066 4055
4067 4056 /*****************************************************************************
4068 4057
4069 4058 The do_marking_step(time_target_ms) method is the building block
4070 4059 of the parallel marking framework. It can be called in parallel
4071 4060 with other invocations of do_marking_step() on different tasks
4072 4061 (but only one per task, obviously) and concurrently with the
4073 4062 mutator threads, or during remark, hence it eliminates the need
4074 4063 for two versions of the code. When called during remark, it will
4075 4064 pick up from where the task left off during the concurrent marking
4076 4065 phase. Interestingly, tasks are also claimable during evacuation
4077 4066 pauses too, since do_marking_step() ensures that it aborts before
4078 4067 it needs to yield.
4079 4068
4080 4069 The data structures that is uses to do marking work are the
4081 4070 following:
4082 4071
4083 4072 (1) Marking Bitmap. If there are gray objects that appear only
4084 4073 on the bitmap (this happens either when dealing with an overflow
4085 4074 or when the initial marking phase has simply marked the roots
4086 4075 and didn't push them on the stack), then tasks claim heap
4087 4076 regions whose bitmap they then scan to find gray objects. A
4088 4077 global finger indicates where the end of the last claimed region
4089 4078 is. A local finger indicates how far into the region a task has
4090 4079 scanned. The two fingers are used to determine how to gray an
4091 4080 object (i.e. whether simply marking it is OK, as it will be
4092 4081 visited by a task in the future, or whether it needs to be also
4093 4082 pushed on a stack).
4094 4083
4095 4084 (2) Local Queue. The local queue of the task which is accessed
4096 4085 reasonably efficiently by the task. Other tasks can steal from
4097 4086 it when they run out of work. Throughout the marking phase, a
4098 4087 task attempts to keep its local queue short but not totally
4099 4088 empty, so that entries are available for stealing by other
4100 4089 tasks. Only when there is no more work, a task will totally
4101 4090 drain its local queue.
4102 4091
4103 4092 (3) Global Mark Stack. This handles local queue overflow. During
4104 4093 marking only sets of entries are moved between it and the local
4105 4094 queues, as access to it requires a mutex and more fine-grain
4106 4095 interaction with it which might cause contention. If it
4107 4096 overflows, then the marking phase should restart and iterate
4108 4097 over the bitmap to identify gray objects. Throughout the marking
4109 4098 phase, tasks attempt to keep the global mark stack at a small
4110 4099 length but not totally empty, so that entries are available for
4111 4100 popping by other tasks. Only when there is no more work, tasks
4112 4101 will totally drain the global mark stack.
4113 4102
4114 4103 (4) Global Region Stack. Entries on it correspond to areas of
4115 4104 the bitmap that need to be scanned since they contain gray
4116 4105 objects. Pushes on the region stack only happen during
4117 4106 evacuation pauses and typically correspond to areas covered by
4118 4107 GC LABS. If it overflows, then the marking phase should restart
4119 4108 and iterate over the bitmap to identify gray objects. Tasks will
4120 4109 try to totally drain the region stack as soon as possible.
4121 4110
4122 4111 (5) SATB Buffer Queue. This is where completed SATB buffers are
4123 4112 made available. Buffers are regularly removed from this queue
4124 4113 and scanned for roots, so that the queue doesn't get too
4125 4114 long. During remark, all completed buffers are processed, as
4126 4115 well as the filled in parts of any uncompleted buffers.
4127 4116
4128 4117 The do_marking_step() method tries to abort when the time target
4129 4118 has been reached. There are a few other cases when the
4130 4119 do_marking_step() method also aborts:
4131 4120
4132 4121 (1) When the marking phase has been aborted (after a Full GC).
4133 4122
4134 4123 (2) When a global overflow (either on the global stack or the
4135 4124 region stack) has been triggered. Before the task aborts, it
4136 4125 will actually sync up with the other tasks to ensure that all
4137 4126 the marking data structures (local queues, stacks, fingers etc.)
4138 4127 are re-initialised so that when do_marking_step() completes,
4139 4128 the marking phase can immediately restart.
4140 4129
4141 4130 (3) When enough completed SATB buffers are available. The
4142 4131 do_marking_step() method only tries to drain SATB buffers right
4143 4132 at the beginning. So, if enough buffers are available, the
4144 4133 marking step aborts and the SATB buffers are processed at
4145 4134 the beginning of the next invocation.
4146 4135
4147 4136 (4) To yield. when we have to yield then we abort and yield
4148 4137 right at the end of do_marking_step(). This saves us from a lot
4149 4138 of hassle as, by yielding we might allow a Full GC. If this
4150 4139 happens then objects will be compacted underneath our feet, the
4151 4140 heap might shrink, etc. We save checking for this by just
4152 4141 aborting and doing the yield right at the end.
4153 4142
4154 4143 From the above it follows that the do_marking_step() method should
4155 4144 be called in a loop (or, otherwise, regularly) until it completes.
4156 4145
4157 4146 If a marking step completes without its has_aborted() flag being
4158 4147 true, it means it has completed the current marking phase (and
4159 4148 also all other marking tasks have done so and have all synced up).
4160 4149
4161 4150 A method called regular_clock_call() is invoked "regularly" (in
4162 4151 sub ms intervals) throughout marking. It is this clock method that
4163 4152 checks all the abort conditions which were mentioned above and
4164 4153 decides when the task should abort. A work-based scheme is used to
4165 4154 trigger this clock method: when the number of object words the
4166 4155 marking phase has scanned or the number of references the marking
4167 4156 phase has visited reach a given limit. Additional invocations to
4168 4157 the method clock have been planted in a few other strategic places
4169 4158 too. The initial reason for the clock method was to avoid calling
4170 4159 vtime too regularly, as it is quite expensive. So, once it was in
4171 4160 place, it was natural to piggy-back all the other conditions on it
4172 4161 too and not constantly check them throughout the code.
4173 4162
4174 4163 *****************************************************************************/
4175 4164
4176 4165 void CMTask::do_marking_step(double time_target_ms,
4177 4166 bool do_stealing,
4178 4167 bool do_termination) {
4179 4168 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4180 4169 assert(concurrent() == _cm->concurrent(), "they should be the same");
4181 4170
4182 4171 assert(concurrent() || _cm->region_stack_empty(),
4183 4172 "the region stack should have been cleared before remark");
4184 4173 assert(concurrent() || !_cm->has_aborted_regions(),
4185 4174 "aborted regions should have been cleared before remark");
4186 4175 assert(_region_finger == NULL,
4187 4176 "this should be non-null only when a region is being scanned");
4188 4177
4189 4178 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4190 4179 assert(_task_queues != NULL, "invariant");
4191 4180 assert(_task_queue != NULL, "invariant");
4192 4181 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4193 4182
4194 4183 assert(!_claimed,
4195 4184 "only one thread should claim this task at any one time");
4196 4185
4197 4186 // OK, this doesn't safeguard again all possible scenarios, as it is
4198 4187 // possible for two threads to set the _claimed flag at the same
4199 4188 // time. But it is only for debugging purposes anyway and it will
4200 4189 // catch most problems.
4201 4190 _claimed = true;
4202 4191
4203 4192 _start_time_ms = os::elapsedVTime() * 1000.0;
4204 4193 statsOnly( _interval_start_time_ms = _start_time_ms );
4205 4194
4206 4195 double diff_prediction_ms =
4207 4196 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4208 4197 _time_target_ms = time_target_ms - diff_prediction_ms;
4209 4198
4210 4199 // set up the variables that are used in the work-based scheme to
4211 4200 // call the regular clock method
4212 4201 _words_scanned = 0;
4213 4202 _refs_reached = 0;
4214 4203 recalculate_limits();
4215 4204
4216 4205 // clear all flags
4217 4206 clear_has_aborted();
4218 4207 _has_timed_out = false;
4219 4208 _draining_satb_buffers = false;
4220 4209
4221 4210 ++_calls;
4222 4211
4223 4212 if (_cm->verbose_low()) {
4224 4213 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4225 4214 "target = %1.2lfms >>>>>>>>>>",
4226 4215 _task_id, _calls, _time_target_ms);
4227 4216 }
4228 4217
4229 4218 // Set up the bitmap and oop closures. Anything that uses them is
4230 4219 // eventually called from this method, so it is OK to allocate these
4231 4220 // statically.
4232 4221 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4233 4222 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4234 4223 set_cm_oop_closure(&cm_oop_closure);
4235 4224
4236 4225 if (_cm->has_overflown()) {
4237 4226 // This can happen if the region stack or the mark stack overflows
4238 4227 // during a GC pause and this task, after a yield point,
4239 4228 // restarts. We have to abort as we need to get into the overflow
4240 4229 // protocol which happens right at the end of this task.
4241 4230 set_has_aborted();
4242 4231 }
4243 4232
4244 4233 // First drain any available SATB buffers. After this, we will not
4245 4234 // look at SATB buffers before the next invocation of this method.
4246 4235 // If enough completed SATB buffers are queued up, the regular clock
4247 4236 // will abort this task so that it restarts.
4248 4237 drain_satb_buffers();
4249 4238 // ...then partially drain the local queue and the global stack
4250 4239 drain_local_queue(true);
4251 4240 drain_global_stack(true);
4252 4241
4253 4242 // Then totally drain the region stack. We will not look at
4254 4243 // it again before the next invocation of this method. Entries on
4255 4244 // the region stack are only added during evacuation pauses, for
4256 4245 // which we have to yield. When we do, we abort the task anyway so
4257 4246 // it will look at the region stack again when it restarts.
4258 4247 bitmap_closure.set_scanning_heap_region(false);
4259 4248 drain_region_stack(&bitmap_closure);
4260 4249 // ...then partially drain the local queue and the global stack
4261 4250 drain_local_queue(true);
4262 4251 drain_global_stack(true);
4263 4252
4264 4253 do {
4265 4254 if (!has_aborted() && _curr_region != NULL) {
4266 4255 // This means that we're already holding on to a region.
4267 4256 assert(_finger != NULL, "if region is not NULL, then the finger "
4268 4257 "should not be NULL either");
4269 4258
4270 4259 // We might have restarted this task after an evacuation pause
4271 4260 // which might have evacuated the region we're holding on to
4272 4261 // underneath our feet. Let's read its limit again to make sure
4273 4262 // that we do not iterate over a region of the heap that
4274 4263 // contains garbage (update_region_limit() will also move
4275 4264 // _finger to the start of the region if it is found empty).
4276 4265 update_region_limit();
4277 4266 // We will start from _finger not from the start of the region,
4278 4267 // as we might be restarting this task after aborting half-way
4279 4268 // through scanning this region. In this case, _finger points to
4280 4269 // the address where we last found a marked object. If this is a
4281 4270 // fresh region, _finger points to start().
4282 4271 MemRegion mr = MemRegion(_finger, _region_limit);
4283 4272
4284 4273 if (_cm->verbose_low()) {
4285 4274 gclog_or_tty->print_cr("[%d] we're scanning part "
4286 4275 "["PTR_FORMAT", "PTR_FORMAT") "
4287 4276 "of region "PTR_FORMAT,
4288 4277 _task_id, _finger, _region_limit, _curr_region);
4289 4278 }
4290 4279
4291 4280 // Let's iterate over the bitmap of the part of the
4292 4281 // region that is left.
4293 4282 bitmap_closure.set_scanning_heap_region(true);
4294 4283 if (mr.is_empty() ||
4295 4284 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4296 4285 // We successfully completed iterating over the region. Now,
4297 4286 // let's give up the region.
4298 4287 giveup_current_region();
4299 4288 regular_clock_call();
4300 4289 } else {
4301 4290 assert(has_aborted(), "currently the only way to do so");
4302 4291 // The only way to abort the bitmap iteration is to return
4303 4292 // false from the do_bit() method. However, inside the
4304 4293 // do_bit() method we move the _finger to point to the
4305 4294 // object currently being looked at. So, if we bail out, we
4306 4295 // have definitely set _finger to something non-null.
4307 4296 assert(_finger != NULL, "invariant");
4308 4297
4309 4298 // Region iteration was actually aborted. So now _finger
4310 4299 // points to the address of the object we last scanned. If we
4311 4300 // leave it there, when we restart this task, we will rescan
4312 4301 // the object. It is easy to avoid this. We move the finger by
4313 4302 // enough to point to the next possible object header (the
4314 4303 // bitmap knows by how much we need to move it as it knows its
4315 4304 // granularity).
4316 4305 assert(_finger < _region_limit, "invariant");
4317 4306 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4318 4307 // Check if bitmap iteration was aborted while scanning the last object
4319 4308 if (new_finger >= _region_limit) {
4320 4309 giveup_current_region();
4321 4310 } else {
4322 4311 move_finger_to(new_finger);
4323 4312 }
4324 4313 }
4325 4314 }
4326 4315 // At this point we have either completed iterating over the
4327 4316 // region we were holding on to, or we have aborted.
4328 4317
4329 4318 // We then partially drain the local queue and the global stack.
4330 4319 // (Do we really need this?)
4331 4320 drain_local_queue(true);
4332 4321 drain_global_stack(true);
4333 4322
4334 4323 // Read the note on the claim_region() method on why it might
4335 4324 // return NULL with potentially more regions available for
4336 4325 // claiming and why we have to check out_of_regions() to determine
4337 4326 // whether we're done or not.
4338 4327 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4339 4328 // We are going to try to claim a new region. We should have
4340 4329 // given up on the previous one.
4341 4330 // Separated the asserts so that we know which one fires.
4342 4331 assert(_curr_region == NULL, "invariant");
4343 4332 assert(_finger == NULL, "invariant");
4344 4333 assert(_region_limit == NULL, "invariant");
4345 4334 if (_cm->verbose_low()) {
4346 4335 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4347 4336 }
4348 4337 HeapRegion* claimed_region = _cm->claim_region(_task_id);
4349 4338 if (claimed_region != NULL) {
4350 4339 // Yes, we managed to claim one
4351 4340 statsOnly( ++_regions_claimed );
4352 4341
4353 4342 if (_cm->verbose_low()) {
4354 4343 gclog_or_tty->print_cr("[%d] we successfully claimed "
4355 4344 "region "PTR_FORMAT,
4356 4345 _task_id, claimed_region);
4357 4346 }
4358 4347
4359 4348 setup_for_region(claimed_region);
4360 4349 assert(_curr_region == claimed_region, "invariant");
4361 4350 }
4362 4351 // It is important to call the regular clock here. It might take
4363 4352 // a while to claim a region if, for example, we hit a large
4364 4353 // block of empty regions. So we need to call the regular clock
4365 4354 // method once round the loop to make sure it's called
4366 4355 // frequently enough.
4367 4356 regular_clock_call();
4368 4357 }
4369 4358
4370 4359 if (!has_aborted() && _curr_region == NULL) {
4371 4360 assert(_cm->out_of_regions(),
4372 4361 "at this point we should be out of regions");
4373 4362 }
4374 4363 } while ( _curr_region != NULL && !has_aborted());
4375 4364
4376 4365 if (!has_aborted()) {
4377 4366 // We cannot check whether the global stack is empty, since other
4378 4367 // tasks might be pushing objects to it concurrently. We also cannot
4379 4368 // check if the region stack is empty because if a thread is aborting
4380 4369 // it can push a partially done region back.
4381 4370 assert(_cm->out_of_regions(),
4382 4371 "at this point we should be out of regions");
4383 4372
4384 4373 if (_cm->verbose_low()) {
4385 4374 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4386 4375 }
4387 4376
4388 4377 // Try to reduce the number of available SATB buffers so that
4389 4378 // remark has less work to do.
4390 4379 drain_satb_buffers();
4391 4380 }
4392 4381
4393 4382 // Since we've done everything else, we can now totally drain the
4394 4383 // local queue and global stack.
4395 4384 drain_local_queue(false);
4396 4385 drain_global_stack(false);
4397 4386
4398 4387 // Attempt at work stealing from other task's queues.
4399 4388 if (do_stealing && !has_aborted()) {
4400 4389 // We have not aborted. This means that we have finished all that
4401 4390 // we could. Let's try to do some stealing...
4402 4391
4403 4392 // We cannot check whether the global stack is empty, since other
4404 4393 // tasks might be pushing objects to it concurrently. We also cannot
4405 4394 // check if the region stack is empty because if a thread is aborting
4406 4395 // it can push a partially done region back.
4407 4396 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4408 4397 "only way to reach here");
4409 4398
4410 4399 if (_cm->verbose_low()) {
4411 4400 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4412 4401 }
4413 4402
4414 4403 while (!has_aborted()) {
4415 4404 oop obj;
4416 4405 statsOnly( ++_steal_attempts );
4417 4406
4418 4407 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4419 4408 if (_cm->verbose_medium()) {
4420 4409 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4421 4410 _task_id, (void*) obj);
4422 4411 }
4423 4412
4424 4413 statsOnly( ++_steals );
4425 4414
4426 4415 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4427 4416 "any stolen object should be marked");
4428 4417 scan_object(obj);
4429 4418
4430 4419 // And since we're towards the end, let's totally drain the
4431 4420 // local queue and global stack.
4432 4421 drain_local_queue(false);
4433 4422 drain_global_stack(false);
4434 4423 } else {
4435 4424 break;
4436 4425 }
4437 4426 }
4438 4427 }
4439 4428
4440 4429 // If we are about to wrap up and go into termination, check if we
4441 4430 // should raise the overflow flag.
4442 4431 if (do_termination && !has_aborted()) {
4443 4432 if (_cm->force_overflow()->should_force()) {
4444 4433 _cm->set_has_overflown();
4445 4434 regular_clock_call();
4446 4435 }
4447 4436 }
4448 4437
4449 4438 // We still haven't aborted. Now, let's try to get into the
4450 4439 // termination protocol.
4451 4440 if (do_termination && !has_aborted()) {
4452 4441 // We cannot check whether the global stack is empty, since other
4453 4442 // tasks might be concurrently pushing objects on it. We also cannot
4454 4443 // check if the region stack is empty because if a thread is aborting
4455 4444 // it can push a partially done region back.
4456 4445 // Separated the asserts so that we know which one fires.
4457 4446 assert(_cm->out_of_regions(), "only way to reach here");
4458 4447 assert(_task_queue->size() == 0, "only way to reach here");
4459 4448
4460 4449 if (_cm->verbose_low()) {
4461 4450 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4462 4451 }
4463 4452
4464 4453 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4465 4454 // The CMTask class also extends the TerminatorTerminator class,
4466 4455 // hence its should_exit_termination() method will also decide
4467 4456 // whether to exit the termination protocol or not.
4468 4457 bool finished = _cm->terminator()->offer_termination(this);
4469 4458 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4470 4459 _termination_time_ms +=
4471 4460 termination_end_time_ms - _termination_start_time_ms;
4472 4461
4473 4462 if (finished) {
4474 4463 // We're all done.
4475 4464
4476 4465 if (_task_id == 0) {
4477 4466 // let's allow task 0 to do this
4478 4467 if (concurrent()) {
4479 4468 assert(_cm->concurrent_marking_in_progress(), "invariant");
4480 4469 // we need to set this to false before the next
4481 4470 // safepoint. This way we ensure that the marking phase
4482 4471 // doesn't observe any more heap expansions.
4483 4472 _cm->clear_concurrent_marking_in_progress();
4484 4473 }
4485 4474 }
4486 4475
4487 4476 // We can now guarantee that the global stack is empty, since
4488 4477 // all other tasks have finished. We separated the guarantees so
4489 4478 // that, if a condition is false, we can immediately find out
4490 4479 // which one.
4491 4480 guarantee(_cm->out_of_regions(), "only way to reach here");
4492 4481 guarantee(_aborted_region.is_empty(), "only way to reach here");
4493 4482 guarantee(_cm->region_stack_empty(), "only way to reach here");
4494 4483 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4495 4484 guarantee(_task_queue->size() == 0, "only way to reach here");
4496 4485 guarantee(!_cm->has_overflown(), "only way to reach here");
4497 4486 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4498 4487 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4499 4488
4500 4489 if (_cm->verbose_low()) {
4501 4490 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4502 4491 }
4503 4492 } else {
4504 4493 // Apparently there's more work to do. Let's abort this task. It
4505 4494 // will restart it and we can hopefully find more things to do.
4506 4495
4507 4496 if (_cm->verbose_low()) {
4508 4497 gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4509 4498 _task_id);
4510 4499 }
4511 4500
4512 4501 set_has_aborted();
4513 4502 statsOnly( ++_aborted_termination );
4514 4503 }
4515 4504 }
4516 4505
4517 4506 // Mainly for debugging purposes to make sure that a pointer to the
4518 4507 // closure which was statically allocated in this frame doesn't
4519 4508 // escape it by accident.
4520 4509 set_cm_oop_closure(NULL);
4521 4510 double end_time_ms = os::elapsedVTime() * 1000.0;
4522 4511 double elapsed_time_ms = end_time_ms - _start_time_ms;
4523 4512 // Update the step history.
4524 4513 _step_times_ms.add(elapsed_time_ms);
4525 4514
4526 4515 if (has_aborted()) {
4527 4516 // The task was aborted for some reason.
4528 4517
4529 4518 statsOnly( ++_aborted );
4530 4519
4531 4520 if (_has_timed_out) {
4532 4521 double diff_ms = elapsed_time_ms - _time_target_ms;
4533 4522 // Keep statistics of how well we did with respect to hitting
4534 4523 // our target only if we actually timed out (if we aborted for
4535 4524 // other reasons, then the results might get skewed).
4536 4525 _marking_step_diffs_ms.add(diff_ms);
4537 4526 }
4538 4527
4539 4528 if (_cm->has_overflown()) {
4540 4529 // This is the interesting one. We aborted because a global
4541 4530 // overflow was raised. This means we have to restart the
4542 4531 // marking phase and start iterating over regions. However, in
4543 4532 // order to do this we have to make sure that all tasks stop
4544 4533 // what they are doing and re-initialise in a safe manner. We
4545 4534 // will achieve this with the use of two barrier sync points.
4546 4535
4547 4536 if (_cm->verbose_low()) {
4548 4537 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4549 4538 }
4550 4539
4551 4540 _cm->enter_first_sync_barrier(_task_id);
4552 4541 // When we exit this sync barrier we know that all tasks have
4553 4542 // stopped doing marking work. So, it's now safe to
4554 4543 // re-initialise our data structures. At the end of this method,
4555 4544 // task 0 will clear the global data structures.
4556 4545
4557 4546 statsOnly( ++_aborted_overflow );
4558 4547
4559 4548 // We clear the local state of this task...
4560 4549 clear_region_fields();
4561 4550
4562 4551 // ...and enter the second barrier.
4563 4552 _cm->enter_second_sync_barrier(_task_id);
4564 4553 // At this point everything has bee re-initialised and we're
4565 4554 // ready to restart.
4566 4555 }
4567 4556
4568 4557 if (_cm->verbose_low()) {
4569 4558 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4570 4559 "elapsed = %1.2lfms <<<<<<<<<<",
4571 4560 _task_id, _time_target_ms, elapsed_time_ms);
4572 4561 if (_cm->has_aborted()) {
4573 4562 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4574 4563 _task_id);
4575 4564 }
4576 4565 }
4577 4566 } else {
4578 4567 if (_cm->verbose_low()) {
4579 4568 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4580 4569 "elapsed = %1.2lfms <<<<<<<<<<",
4581 4570 _task_id, _time_target_ms, elapsed_time_ms);
4582 4571 }
4583 4572 }
4584 4573
4585 4574 _claimed = false;
4586 4575 }
4587 4576
4588 4577 CMTask::CMTask(int task_id,
4589 4578 ConcurrentMark* cm,
4590 4579 CMTaskQueue* task_queue,
4591 4580 CMTaskQueueSet* task_queues)
4592 4581 : _g1h(G1CollectedHeap::heap()),
4593 4582 _task_id(task_id), _cm(cm),
4594 4583 _claimed(false),
4595 4584 _nextMarkBitMap(NULL), _hash_seed(17),
4596 4585 _task_queue(task_queue),
4597 4586 _task_queues(task_queues),
4598 4587 _cm_oop_closure(NULL),
4599 4588 _aborted_region(MemRegion()) {
4600 4589 guarantee(task_queue != NULL, "invariant");
4601 4590 guarantee(task_queues != NULL, "invariant");
4602 4591
4603 4592 statsOnly( _clock_due_to_scanning = 0;
4604 4593 _clock_due_to_marking = 0 );
4605 4594
4606 4595 _marking_step_diffs_ms.add(0.5);
4607 4596 }
4608 4597
4609 4598 // These are formatting macros that are used below to ensure
4610 4599 // consistent formatting. The *_H_* versions are used to format the
4611 4600 // header for a particular value and they should be kept consistent
4612 4601 // with the corresponding macro. Also note that most of the macros add
4613 4602 // the necessary white space (as a prefix) which makes them a bit
4614 4603 // easier to compose.
4615 4604
4616 4605 // All the output lines are prefixed with this string to be able to
4617 4606 // identify them easily in a large log file.
4618 4607 #define G1PPRL_LINE_PREFIX "###"
4619 4608
4620 4609 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4621 4610 #ifdef _LP64
4622 4611 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4623 4612 #else // _LP64
4624 4613 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4625 4614 #endif // _LP64
4626 4615
4627 4616 // For per-region info
4628 4617 #define G1PPRL_TYPE_FORMAT " %-4s"
4629 4618 #define G1PPRL_TYPE_H_FORMAT " %4s"
4630 4619 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4631 4620 #define G1PPRL_BYTE_H_FORMAT " %9s"
4632 4621 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4633 4622 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4634 4623
4635 4624 // For summary info
4636 4625 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4637 4626 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4638 4627 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4639 4628 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4640 4629
4641 4630 G1PrintRegionLivenessInfoClosure::
4642 4631 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4643 4632 : _out(out),
4644 4633 _total_used_bytes(0), _total_capacity_bytes(0),
4645 4634 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4646 4635 _hum_used_bytes(0), _hum_capacity_bytes(0),
4647 4636 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4648 4637 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4649 4638 MemRegion g1_committed = g1h->g1_committed();
4650 4639 MemRegion g1_reserved = g1h->g1_reserved();
4651 4640 double now = os::elapsedTime();
4652 4641
4653 4642 // Print the header of the output.
4654 4643 _out->cr();
4655 4644 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4656 4645 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4657 4646 G1PPRL_SUM_ADDR_FORMAT("committed")
4658 4647 G1PPRL_SUM_ADDR_FORMAT("reserved")
4659 4648 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4660 4649 g1_committed.start(), g1_committed.end(),
4661 4650 g1_reserved.start(), g1_reserved.end(),
4662 4651 HeapRegion::GrainBytes);
4663 4652 _out->print_cr(G1PPRL_LINE_PREFIX);
4664 4653 _out->print_cr(G1PPRL_LINE_PREFIX
4665 4654 G1PPRL_TYPE_H_FORMAT
4666 4655 G1PPRL_ADDR_BASE_H_FORMAT
4667 4656 G1PPRL_BYTE_H_FORMAT
4668 4657 G1PPRL_BYTE_H_FORMAT
4669 4658 G1PPRL_BYTE_H_FORMAT
4670 4659 G1PPRL_DOUBLE_H_FORMAT,
4671 4660 "type", "address-range",
4672 4661 "used", "prev-live", "next-live", "gc-eff");
4673 4662 _out->print_cr(G1PPRL_LINE_PREFIX
4674 4663 G1PPRL_TYPE_H_FORMAT
4675 4664 G1PPRL_ADDR_BASE_H_FORMAT
4676 4665 G1PPRL_BYTE_H_FORMAT
4677 4666 G1PPRL_BYTE_H_FORMAT
4678 4667 G1PPRL_BYTE_H_FORMAT
4679 4668 G1PPRL_DOUBLE_H_FORMAT,
4680 4669 "", "",
4681 4670 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4682 4671 }
4683 4672
4684 4673 // It takes as a parameter a reference to one of the _hum_* fields, it
4685 4674 // deduces the corresponding value for a region in a humongous region
4686 4675 // series (either the region size, or what's left if the _hum_* field
4687 4676 // is < the region size), and updates the _hum_* field accordingly.
4688 4677 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4689 4678 size_t bytes = 0;
4690 4679 // The > 0 check is to deal with the prev and next live bytes which
4691 4680 // could be 0.
4692 4681 if (*hum_bytes > 0) {
4693 4682 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4694 4683 *hum_bytes -= bytes;
4695 4684 }
4696 4685 return bytes;
4697 4686 }
4698 4687
4699 4688 // It deduces the values for a region in a humongous region series
4700 4689 // from the _hum_* fields and updates those accordingly. It assumes
4701 4690 // that that _hum_* fields have already been set up from the "starts
4702 4691 // humongous" region and we visit the regions in address order.
4703 4692 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4704 4693 size_t* capacity_bytes,
4705 4694 size_t* prev_live_bytes,
4706 4695 size_t* next_live_bytes) {
4707 4696 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4708 4697 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4709 4698 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4710 4699 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4711 4700 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4712 4701 }
4713 4702
4714 4703 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4715 4704 const char* type = "";
4716 4705 HeapWord* bottom = r->bottom();
4717 4706 HeapWord* end = r->end();
4718 4707 size_t capacity_bytes = r->capacity();
4719 4708 size_t used_bytes = r->used();
4720 4709 size_t prev_live_bytes = r->live_bytes();
4721 4710 size_t next_live_bytes = r->next_live_bytes();
4722 4711 double gc_eff = r->gc_efficiency();
4723 4712 if (r->used() == 0) {
4724 4713 type = "FREE";
4725 4714 } else if (r->is_survivor()) {
4726 4715 type = "SURV";
4727 4716 } else if (r->is_young()) {
4728 4717 type = "EDEN";
4729 4718 } else if (r->startsHumongous()) {
4730 4719 type = "HUMS";
4731 4720
4732 4721 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4733 4722 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4734 4723 "they should have been zeroed after the last time we used them");
4735 4724 // Set up the _hum_* fields.
4736 4725 _hum_capacity_bytes = capacity_bytes;
4737 4726 _hum_used_bytes = used_bytes;
4738 4727 _hum_prev_live_bytes = prev_live_bytes;
4739 4728 _hum_next_live_bytes = next_live_bytes;
4740 4729 get_hum_bytes(&used_bytes, &capacity_bytes,
4741 4730 &prev_live_bytes, &next_live_bytes);
4742 4731 end = bottom + HeapRegion::GrainWords;
4743 4732 } else if (r->continuesHumongous()) {
4744 4733 type = "HUMC";
4745 4734 get_hum_bytes(&used_bytes, &capacity_bytes,
4746 4735 &prev_live_bytes, &next_live_bytes);
4747 4736 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4748 4737 } else {
4749 4738 type = "OLD";
4750 4739 }
4751 4740
4752 4741 _total_used_bytes += used_bytes;
4753 4742 _total_capacity_bytes += capacity_bytes;
4754 4743 _total_prev_live_bytes += prev_live_bytes;
4755 4744 _total_next_live_bytes += next_live_bytes;
4756 4745
4757 4746 // Print a line for this particular region.
4758 4747 _out->print_cr(G1PPRL_LINE_PREFIX
4759 4748 G1PPRL_TYPE_FORMAT
4760 4749 G1PPRL_ADDR_BASE_FORMAT
4761 4750 G1PPRL_BYTE_FORMAT
4762 4751 G1PPRL_BYTE_FORMAT
4763 4752 G1PPRL_BYTE_FORMAT
4764 4753 G1PPRL_DOUBLE_FORMAT,
4765 4754 type, bottom, end,
4766 4755 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4767 4756
4768 4757 return false;
4769 4758 }
4770 4759
4771 4760 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4772 4761 // Print the footer of the output.
4773 4762 _out->print_cr(G1PPRL_LINE_PREFIX);
4774 4763 _out->print_cr(G1PPRL_LINE_PREFIX
4775 4764 " SUMMARY"
4776 4765 G1PPRL_SUM_MB_FORMAT("capacity")
4777 4766 G1PPRL_SUM_MB_PERC_FORMAT("used")
4778 4767 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4779 4768 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4780 4769 bytes_to_mb(_total_capacity_bytes),
4781 4770 bytes_to_mb(_total_used_bytes),
4782 4771 perc(_total_used_bytes, _total_capacity_bytes),
4783 4772 bytes_to_mb(_total_prev_live_bytes),
4784 4773 perc(_total_prev_live_bytes, _total_capacity_bytes),
4785 4774 bytes_to_mb(_total_next_live_bytes),
4786 4775 perc(_total_next_live_bytes, _total_capacity_bytes));
4787 4776 _out->cr();
4788 4777 }
↓ open down ↓ |
4662 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX