Print this page
rev 2585 : [mq]: g1-reference-processing
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
32 32 #include "gc_implementation/g1/g1RemSet.hpp"
33 33 #include "gc_implementation/g1/heapRegionRemSet.hpp"
34 34 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
35 35 #include "gc_implementation/shared/vmGCOperations.hpp"
36 36 #include "memory/genOopClosures.inline.hpp"
37 37 #include "memory/referencePolicy.hpp"
38 38 #include "memory/resourceArea.hpp"
39 39 #include "oops/oop.inline.hpp"
40 40 #include "runtime/handles.inline.hpp"
41 41 #include "runtime/java.hpp"
42 42
43 43 //
44 44 // CMS Bit Map Wrapper
45 45
46 46 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
47 47 _bm((uintptr_t*)NULL,0),
48 48 _shifter(shifter) {
49 49 _bmStartWord = (HeapWord*)(rs.base());
50 50 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
51 51 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
52 52 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
53 53
54 54 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
55 55 // For now we'll just commit all of the bit map up fromt.
56 56 // Later on we'll try to be more parsimonious with swap.
57 57 guarantee(_virtual_space.initialize(brs, brs.size()),
58 58 "couldn't reseve backing store for CMS bit map");
59 59 assert(_virtual_space.committed_size() == brs.size(),
60 60 "didn't reserve backing store for all of CMS bit map?");
61 61 _bm.set_map((uintptr_t*)_virtual_space.low());
62 62 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
63 63 _bmWordSize, "inconsistency in bit map sizing");
64 64 _bm.set_size(_bmWordSize >> _shifter);
65 65 }
66 66
67 67 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
68 68 HeapWord* limit) const {
69 69 // First we must round addr *up* to a possible object boundary.
70 70 addr = (HeapWord*)align_size_up((intptr_t)addr,
71 71 HeapWordSize << _shifter);
72 72 size_t addrOffset = heapWordToOffset(addr);
73 73 if (limit == NULL) {
74 74 limit = _bmStartWord + _bmWordSize;
75 75 }
76 76 size_t limitOffset = heapWordToOffset(limit);
77 77 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
78 78 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
79 79 assert(nextAddr >= addr, "get_next_one postcondition");
80 80 assert(nextAddr == limit || isMarked(nextAddr),
81 81 "get_next_one postcondition");
82 82 return nextAddr;
83 83 }
84 84
85 85 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
86 86 HeapWord* limit) const {
87 87 size_t addrOffset = heapWordToOffset(addr);
88 88 if (limit == NULL) {
89 89 limit = _bmStartWord + _bmWordSize;
90 90 }
91 91 size_t limitOffset = heapWordToOffset(limit);
92 92 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
93 93 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
94 94 assert(nextAddr >= addr, "get_next_one postcondition");
95 95 assert(nextAddr == limit || !isMarked(nextAddr),
96 96 "get_next_one postcondition");
97 97 return nextAddr;
98 98 }
99 99
100 100 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
101 101 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
102 102 return (int) (diff >> _shifter);
103 103 }
104 104
105 105 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
106 106 HeapWord* left = MAX2(_bmStartWord, mr.start());
107 107 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
108 108 if (right > left) {
109 109 // Right-open interval [leftOffset, rightOffset).
110 110 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
111 111 } else {
112 112 return true;
113 113 }
114 114 }
115 115
116 116 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
117 117 size_t from_start_index,
118 118 HeapWord* to_start_word,
119 119 size_t word_num) {
120 120 _bm.mostly_disjoint_range_union(from_bitmap,
121 121 from_start_index,
122 122 heapWordToOffset(to_start_word),
123 123 word_num);
124 124 }
125 125
126 126 #ifndef PRODUCT
127 127 bool CMBitMapRO::covers(ReservedSpace rs) const {
128 128 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
129 129 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
130 130 "size inconsistency");
131 131 return _bmStartWord == (HeapWord*)(rs.base()) &&
132 132 _bmWordSize == rs.size()>>LogHeapWordSize;
133 133 }
134 134 #endif
135 135
136 136 void CMBitMap::clearAll() {
137 137 _bm.clear();
138 138 return;
139 139 }
140 140
141 141 void CMBitMap::markRange(MemRegion mr) {
142 142 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
143 143 assert(!mr.is_empty(), "unexpected empty region");
144 144 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
145 145 ((HeapWord *) mr.end())),
146 146 "markRange memory region end is not card aligned");
147 147 // convert address range into offset range
148 148 _bm.at_put_range(heapWordToOffset(mr.start()),
149 149 heapWordToOffset(mr.end()), true);
150 150 }
151 151
152 152 void CMBitMap::clearRange(MemRegion mr) {
153 153 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
154 154 assert(!mr.is_empty(), "unexpected empty region");
155 155 // convert address range into offset range
156 156 _bm.at_put_range(heapWordToOffset(mr.start()),
157 157 heapWordToOffset(mr.end()), false);
158 158 }
159 159
160 160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
161 161 HeapWord* end_addr) {
162 162 HeapWord* start = getNextMarkedWordAddress(addr);
163 163 start = MIN2(start, end_addr);
164 164 HeapWord* end = getNextUnmarkedWordAddress(start);
165 165 end = MIN2(end, end_addr);
166 166 assert(start <= end, "Consistency check");
167 167 MemRegion mr(start, end);
168 168 if (!mr.is_empty()) {
169 169 clearRange(mr);
170 170 }
171 171 return mr;
172 172 }
173 173
174 174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
175 175 _base(NULL), _cm(cm)
176 176 #ifdef ASSERT
177 177 , _drain_in_progress(false)
178 178 , _drain_in_progress_yields(false)
179 179 #endif
180 180 {}
181 181
182 182 void CMMarkStack::allocate(size_t size) {
183 183 _base = NEW_C_HEAP_ARRAY(oop, size);
184 184 if (_base == NULL) {
185 185 vm_exit_during_initialization("Failed to allocate "
186 186 "CM region mark stack");
187 187 }
188 188 _index = 0;
189 189 _capacity = (jint) size;
190 190 _oops_do_bound = -1;
191 191 NOT_PRODUCT(_max_depth = 0);
192 192 }
193 193
194 194 CMMarkStack::~CMMarkStack() {
195 195 if (_base != NULL) {
196 196 FREE_C_HEAP_ARRAY(oop, _base);
197 197 }
198 198 }
199 199
200 200 void CMMarkStack::par_push(oop ptr) {
201 201 while (true) {
202 202 if (isFull()) {
203 203 _overflow = true;
204 204 return;
205 205 }
206 206 // Otherwise...
207 207 jint index = _index;
208 208 jint next_index = index+1;
209 209 jint res = Atomic::cmpxchg(next_index, &_index, index);
210 210 if (res == index) {
211 211 _base[index] = ptr;
212 212 // Note that we don't maintain this atomically. We could, but it
213 213 // doesn't seem necessary.
214 214 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
215 215 return;
216 216 }
217 217 // Otherwise, we need to try again.
218 218 }
219 219 }
220 220
221 221 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
222 222 while (true) {
223 223 if (isFull()) {
224 224 _overflow = true;
225 225 return;
226 226 }
227 227 // Otherwise...
228 228 jint index = _index;
229 229 jint next_index = index + n;
230 230 if (next_index > _capacity) {
231 231 _overflow = true;
232 232 return;
233 233 }
234 234 jint res = Atomic::cmpxchg(next_index, &_index, index);
235 235 if (res == index) {
236 236 for (int i = 0; i < n; i++) {
237 237 int ind = index + i;
238 238 assert(ind < _capacity, "By overflow test above.");
239 239 _base[ind] = ptr_arr[i];
240 240 }
241 241 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
242 242 return;
243 243 }
244 244 // Otherwise, we need to try again.
245 245 }
246 246 }
247 247
248 248
249 249 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
250 250 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
251 251 jint start = _index;
252 252 jint next_index = start + n;
253 253 if (next_index > _capacity) {
254 254 _overflow = true;
255 255 return;
256 256 }
257 257 // Otherwise.
258 258 _index = next_index;
259 259 for (int i = 0; i < n; i++) {
260 260 int ind = start + i;
261 261 assert(ind < _capacity, "By overflow test above.");
262 262 _base[ind] = ptr_arr[i];
263 263 }
264 264 }
265 265
266 266
267 267 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
268 268 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
269 269 jint index = _index;
270 270 if (index == 0) {
271 271 *n = 0;
272 272 return false;
273 273 } else {
274 274 int k = MIN2(max, index);
275 275 jint new_ind = index - k;
276 276 for (int j = 0; j < k; j++) {
277 277 ptr_arr[j] = _base[new_ind + j];
278 278 }
279 279 _index = new_ind;
280 280 *n = k;
281 281 return true;
282 282 }
283 283 }
284 284
285 285
286 286 CMRegionStack::CMRegionStack() : _base(NULL) {}
287 287
288 288 void CMRegionStack::allocate(size_t size) {
289 289 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
290 290 if (_base == NULL) {
291 291 vm_exit_during_initialization("Failed to allocate CM region mark stack");
292 292 }
293 293 _index = 0;
294 294 _capacity = (jint) size;
295 295 }
296 296
297 297 CMRegionStack::~CMRegionStack() {
298 298 if (_base != NULL) {
299 299 FREE_C_HEAP_ARRAY(oop, _base);
300 300 }
301 301 }
302 302
303 303 void CMRegionStack::push_lock_free(MemRegion mr) {
304 304 assert(mr.word_size() > 0, "Precondition");
305 305 while (true) {
306 306 jint index = _index;
307 307
308 308 if (index >= _capacity) {
309 309 _overflow = true;
310 310 return;
311 311 }
312 312 // Otherwise...
313 313 jint next_index = index+1;
314 314 jint res = Atomic::cmpxchg(next_index, &_index, index);
315 315 if (res == index) {
316 316 _base[index] = mr;
317 317 return;
318 318 }
319 319 // Otherwise, we need to try again.
320 320 }
321 321 }
322 322
323 323 // Lock-free pop of the region stack. Called during the concurrent
324 324 // marking / remark phases. Should only be called in tandem with
325 325 // other lock-free pops.
326 326 MemRegion CMRegionStack::pop_lock_free() {
327 327 while (true) {
328 328 jint index = _index;
329 329
330 330 if (index == 0) {
331 331 return MemRegion();
332 332 }
333 333 // Otherwise...
334 334 jint next_index = index-1;
335 335 jint res = Atomic::cmpxchg(next_index, &_index, index);
336 336 if (res == index) {
337 337 MemRegion mr = _base[next_index];
338 338 if (mr.start() != NULL) {
339 339 assert(mr.end() != NULL, "invariant");
340 340 assert(mr.word_size() > 0, "invariant");
341 341 return mr;
342 342 } else {
343 343 // that entry was invalidated... let's skip it
344 344 assert(mr.end() == NULL, "invariant");
345 345 }
346 346 }
347 347 // Otherwise, we need to try again.
348 348 }
349 349 }
350 350
351 351 #if 0
352 352 // The routines that manipulate the region stack with a lock are
353 353 // not currently used. They should be retained, however, as a
354 354 // diagnostic aid.
355 355
356 356 void CMRegionStack::push_with_lock(MemRegion mr) {
357 357 assert(mr.word_size() > 0, "Precondition");
358 358 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
359 359
360 360 if (isFull()) {
361 361 _overflow = true;
362 362 return;
363 363 }
364 364
365 365 _base[_index] = mr;
366 366 _index += 1;
367 367 }
368 368
369 369 MemRegion CMRegionStack::pop_with_lock() {
370 370 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
371 371
372 372 while (true) {
373 373 if (_index == 0) {
374 374 return MemRegion();
375 375 }
376 376 _index -= 1;
377 377
378 378 MemRegion mr = _base[_index];
379 379 if (mr.start() != NULL) {
380 380 assert(mr.end() != NULL, "invariant");
381 381 assert(mr.word_size() > 0, "invariant");
382 382 return mr;
383 383 } else {
384 384 // that entry was invalidated... let's skip it
385 385 assert(mr.end() == NULL, "invariant");
386 386 }
387 387 }
388 388 }
389 389 #endif
390 390
391 391 bool CMRegionStack::invalidate_entries_into_cset() {
392 392 bool result = false;
393 393 G1CollectedHeap* g1h = G1CollectedHeap::heap();
394 394 for (int i = 0; i < _oops_do_bound; ++i) {
395 395 MemRegion mr = _base[i];
396 396 if (mr.start() != NULL) {
397 397 assert(mr.end() != NULL, "invariant");
398 398 assert(mr.word_size() > 0, "invariant");
399 399 HeapRegion* hr = g1h->heap_region_containing(mr.start());
400 400 assert(hr != NULL, "invariant");
401 401 if (hr->in_collection_set()) {
402 402 // The region points into the collection set
403 403 _base[i] = MemRegion();
404 404 result = true;
405 405 }
406 406 } else {
407 407 // that entry was invalidated... let's skip it
408 408 assert(mr.end() == NULL, "invariant");
409 409 }
410 410 }
411 411 return result;
412 412 }
413 413
414 414 template<class OopClosureClass>
415 415 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
416 416 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
417 417 || SafepointSynchronize::is_at_safepoint(),
418 418 "Drain recursion must be yield-safe.");
419 419 bool res = true;
420 420 debug_only(_drain_in_progress = true);
421 421 debug_only(_drain_in_progress_yields = yield_after);
422 422 while (!isEmpty()) {
423 423 oop newOop = pop();
424 424 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
425 425 assert(newOop->is_oop(), "Expected an oop");
426 426 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
427 427 "only grey objects on this stack");
428 428 // iterate over the oops in this oop, marking and pushing
429 429 // the ones in CMS generation.
430 430 newOop->oop_iterate(cl);
431 431 if (yield_after && _cm->do_yield_check()) {
432 432 res = false;
433 433 break;
434 434 }
435 435 }
436 436 debug_only(_drain_in_progress = false);
437 437 return res;
438 438 }
439 439
440 440 void CMMarkStack::oops_do(OopClosure* f) {
441 441 if (_index == 0) return;
442 442 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
443 443 "Bound must be set.");
444 444 for (int i = 0; i < _oops_do_bound; i++) {
445 445 f->do_oop(&_base[i]);
446 446 }
447 447 _oops_do_bound = -1;
448 448 }
449 449
450 450 bool ConcurrentMark::not_yet_marked(oop obj) const {
451 451 return (_g1h->is_obj_ill(obj)
452 452 || (_g1h->is_in_permanent(obj)
453 453 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
454 454 }
455 455
456 456 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
457 457 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
458 458 #endif // _MSC_VER
459 459
460 460 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
461 461 int max_regions) :
462 462 _markBitMap1(rs, MinObjAlignment - 1),
463 463 _markBitMap2(rs, MinObjAlignment - 1),
464 464
465 465 _parallel_marking_threads(0),
466 466 _sleep_factor(0.0),
467 467 _marking_task_overhead(1.0),
468 468 _cleanup_sleep_factor(0.0),
469 469 _cleanup_task_overhead(1.0),
470 470 _cleanup_list("Cleanup List"),
471 471 _region_bm(max_regions, false /* in_resource_area*/),
472 472 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
473 473 CardTableModRefBS::card_shift,
474 474 false /* in_resource_area*/),
475 475 _prevMarkBitMap(&_markBitMap1),
476 476 _nextMarkBitMap(&_markBitMap2),
477 477 _at_least_one_mark_complete(false),
478 478
479 479 _markStack(this),
480 480 _regionStack(),
481 481 // _finger set in set_non_marking_state
482 482
483 483 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
484 484 // _active_tasks set in set_non_marking_state
485 485 // _tasks set inside the constructor
486 486 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
487 487 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
488 488
489 489 _has_overflown(false),
490 490 _concurrent(false),
491 491 _has_aborted(false),
492 492 _restart_for_overflow(false),
493 493 _concurrent_marking_in_progress(false),
494 494 _should_gray_objects(false),
495 495
496 496 // _verbose_level set below
497 497
498 498 _init_times(),
499 499 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
500 500 _cleanup_times(),
501 501 _total_counting_time(0.0),
502 502 _total_rs_scrub_time(0.0),
503 503
504 504 _parallel_workers(NULL) {
505 505 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
506 506 if (verbose_level < no_verbose) {
507 507 verbose_level = no_verbose;
508 508 }
509 509 if (verbose_level > high_verbose) {
510 510 verbose_level = high_verbose;
511 511 }
512 512 _verbose_level = verbose_level;
513 513
514 514 if (verbose_low()) {
515 515 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
516 516 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
517 517 }
518 518
519 519 _markStack.allocate(MarkStackSize);
520 520 _regionStack.allocate(G1MarkRegionStackSize);
521 521
522 522 // Create & start a ConcurrentMark thread.
523 523 _cmThread = new ConcurrentMarkThread(this);
524 524 assert(cmThread() != NULL, "CM Thread should have been created");
525 525 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
526 526
527 527 _g1h = G1CollectedHeap::heap();
528 528 assert(CGC_lock != NULL, "Where's the CGC_lock?");
529 529 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
530 530 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
531 531
532 532 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
533 533 satb_qs.set_buffer_size(G1SATBBufferSize);
534 534
535 535 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
536 536 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
537 537
538 538 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
539 539 _active_tasks = _max_task_num;
540 540 for (int i = 0; i < (int) _max_task_num; ++i) {
541 541 CMTaskQueue* task_queue = new CMTaskQueue();
542 542 task_queue->initialize();
543 543 _task_queues->register_queue(i, task_queue);
544 544
545 545 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
546 546 _accum_task_vtime[i] = 0.0;
547 547 }
548 548
549 549 if (ConcGCThreads > ParallelGCThreads) {
550 550 vm_exit_during_initialization("Can't have more ConcGCThreads "
551 551 "than ParallelGCThreads.");
552 552 }
553 553 if (ParallelGCThreads == 0) {
554 554 // if we are not running with any parallel GC threads we will not
555 555 // spawn any marking threads either
556 556 _parallel_marking_threads = 0;
557 557 _sleep_factor = 0.0;
558 558 _marking_task_overhead = 1.0;
559 559 } else {
560 560 if (ConcGCThreads > 0) {
561 561 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
562 562 // if both are set
563 563
564 564 _parallel_marking_threads = ConcGCThreads;
565 565 _sleep_factor = 0.0;
566 566 _marking_task_overhead = 1.0;
567 567 } else if (G1MarkingOverheadPercent > 0) {
568 568 // we will calculate the number of parallel marking threads
569 569 // based on a target overhead with respect to the soft real-time
570 570 // goal
571 571
572 572 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
573 573 double overall_cm_overhead =
574 574 (double) MaxGCPauseMillis * marking_overhead /
575 575 (double) GCPauseIntervalMillis;
576 576 double cpu_ratio = 1.0 / (double) os::processor_count();
577 577 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
578 578 double marking_task_overhead =
579 579 overall_cm_overhead / marking_thread_num *
580 580 (double) os::processor_count();
581 581 double sleep_factor =
582 582 (1.0 - marking_task_overhead) / marking_task_overhead;
583 583
584 584 _parallel_marking_threads = (size_t) marking_thread_num;
585 585 _sleep_factor = sleep_factor;
586 586 _marking_task_overhead = marking_task_overhead;
587 587 } else {
588 588 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
589 589 _sleep_factor = 0.0;
590 590 _marking_task_overhead = 1.0;
591 591 }
592 592
593 593 if (parallel_marking_threads() > 1) {
594 594 _cleanup_task_overhead = 1.0;
595 595 } else {
596 596 _cleanup_task_overhead = marking_task_overhead();
597 597 }
598 598 _cleanup_sleep_factor =
599 599 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
600 600
601 601 #if 0
602 602 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
603 603 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
604 604 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
605 605 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
606 606 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
607 607 #endif
608 608
609 609 guarantee(parallel_marking_threads() > 0, "peace of mind");
610 610 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
611 611 (int) _parallel_marking_threads, false, true);
612 612 if (_parallel_workers == NULL) {
613 613 vm_exit_during_initialization("Failed necessary allocation.");
614 614 } else {
615 615 _parallel_workers->initialize_workers();
616 616 }
617 617 }
618 618
619 619 // so that the call below can read a sensible value
620 620 _heap_start = (HeapWord*) rs.base();
621 621 set_non_marking_state();
622 622 }
623 623
624 624 void ConcurrentMark::update_g1_committed(bool force) {
625 625 // If concurrent marking is not in progress, then we do not need to
626 626 // update _heap_end. This has a subtle and important
627 627 // side-effect. Imagine that two evacuation pauses happen between
628 628 // marking completion and remark. The first one can grow the
629 629 // heap (hence now the finger is below the heap end). Then, the
630 630 // second one could unnecessarily push regions on the region
631 631 // stack. This causes the invariant that the region stack is empty
632 632 // at the beginning of remark to be false. By ensuring that we do
633 633 // not observe heap expansions after marking is complete, then we do
634 634 // not have this problem.
635 635 if (!concurrent_marking_in_progress() && !force) return;
636 636
637 637 MemRegion committed = _g1h->g1_committed();
638 638 assert(committed.start() == _heap_start, "start shouldn't change");
639 639 HeapWord* new_end = committed.end();
640 640 if (new_end > _heap_end) {
641 641 // The heap has been expanded.
642 642
643 643 _heap_end = new_end;
644 644 }
645 645 // Notice that the heap can also shrink. However, this only happens
646 646 // during a Full GC (at least currently) and the entire marking
647 647 // phase will bail out and the task will not be restarted. So, let's
648 648 // do nothing.
649 649 }
650 650
651 651 void ConcurrentMark::reset() {
652 652 // Starting values for these two. This should be called in a STW
653 653 // phase. CM will be notified of any future g1_committed expansions
654 654 // will be at the end of evacuation pauses, when tasks are
655 655 // inactive.
656 656 MemRegion committed = _g1h->g1_committed();
657 657 _heap_start = committed.start();
658 658 _heap_end = committed.end();
659 659
660 660 // Separated the asserts so that we know which one fires.
661 661 assert(_heap_start != NULL, "heap bounds should look ok");
662 662 assert(_heap_end != NULL, "heap bounds should look ok");
663 663 assert(_heap_start < _heap_end, "heap bounds should look ok");
664 664
665 665 // reset all the marking data structures and any necessary flags
666 666 clear_marking_state();
667 667
668 668 if (verbose_low()) {
669 669 gclog_or_tty->print_cr("[global] resetting");
670 670 }
671 671
672 672 // We do reset all of them, since different phases will use
673 673 // different number of active threads. So, it's easiest to have all
674 674 // of them ready.
675 675 for (int i = 0; i < (int) _max_task_num; ++i) {
676 676 _tasks[i]->reset(_nextMarkBitMap);
677 677 }
678 678
679 679 // we need this to make sure that the flag is on during the evac
680 680 // pause with initial mark piggy-backed
681 681 set_concurrent_marking_in_progress();
682 682 }
683 683
684 684 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
685 685 assert(active_tasks <= _max_task_num, "we should not have more");
686 686
687 687 _active_tasks = active_tasks;
688 688 // Need to update the three data structures below according to the
689 689 // number of active threads for this phase.
690 690 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
691 691 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
692 692 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
693 693
694 694 _concurrent = concurrent;
695 695 // We propagate this to all tasks, not just the active ones.
696 696 for (int i = 0; i < (int) _max_task_num; ++i)
697 697 _tasks[i]->set_concurrent(concurrent);
698 698
699 699 if (concurrent) {
700 700 set_concurrent_marking_in_progress();
701 701 } else {
702 702 // We currently assume that the concurrent flag has been set to
703 703 // false before we start remark. At this point we should also be
704 704 // in a STW phase.
705 705 assert(!concurrent_marking_in_progress(), "invariant");
706 706 assert(_finger == _heap_end, "only way to get here");
707 707 update_g1_committed(true);
708 708 }
709 709 }
710 710
711 711 void ConcurrentMark::set_non_marking_state() {
712 712 // We set the global marking state to some default values when we're
713 713 // not doing marking.
714 714 clear_marking_state();
715 715 _active_tasks = 0;
716 716 clear_concurrent_marking_in_progress();
717 717 }
718 718
719 719 ConcurrentMark::~ConcurrentMark() {
720 720 for (int i = 0; i < (int) _max_task_num; ++i) {
721 721 delete _task_queues->queue(i);
722 722 delete _tasks[i];
723 723 }
724 724 delete _task_queues;
725 725 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
726 726 }
727 727
728 728 // This closure is used to mark refs into the g1 generation
729 729 // from external roots in the CMS bit map.
730 730 // Called at the first checkpoint.
731 731 //
732 732
733 733 void ConcurrentMark::clearNextBitmap() {
734 734 G1CollectedHeap* g1h = G1CollectedHeap::heap();
735 735 G1CollectorPolicy* g1p = g1h->g1_policy();
736 736
737 737 // Make sure that the concurrent mark thread looks to still be in
738 738 // the current cycle.
739 739 guarantee(cmThread()->during_cycle(), "invariant");
740 740
741 741 // We are finishing up the current cycle by clearing the next
742 742 // marking bitmap and getting it ready for the next cycle. During
743 743 // this time no other cycle can start. So, let's make sure that this
744 744 // is the case.
745 745 guarantee(!g1h->mark_in_progress(), "invariant");
746 746
747 747 // clear the mark bitmap (no grey objects to start with).
748 748 // We need to do this in chunks and offer to yield in between
749 749 // each chunk.
750 750 HeapWord* start = _nextMarkBitMap->startWord();
751 751 HeapWord* end = _nextMarkBitMap->endWord();
752 752 HeapWord* cur = start;
753 753 size_t chunkSize = M;
754 754 while (cur < end) {
755 755 HeapWord* next = cur + chunkSize;
756 756 if (next > end) {
757 757 next = end;
758 758 }
759 759 MemRegion mr(cur,next);
760 760 _nextMarkBitMap->clearRange(mr);
761 761 cur = next;
762 762 do_yield_check();
763 763
764 764 // Repeat the asserts from above. We'll do them as asserts here to
765 765 // minimize their overhead on the product. However, we'll have
766 766 // them as guarantees at the beginning / end of the bitmap
767 767 // clearing to get some checking in the product.
768 768 assert(cmThread()->during_cycle(), "invariant");
769 769 assert(!g1h->mark_in_progress(), "invariant");
770 770 }
771 771
772 772 // Repeat the asserts from above.
773 773 guarantee(cmThread()->during_cycle(), "invariant");
774 774 guarantee(!g1h->mark_in_progress(), "invariant");
775 775 }
776 776
777 777 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
778 778 public:
779 779 bool doHeapRegion(HeapRegion* r) {
780 780 if (!r->continuesHumongous()) {
781 781 r->note_start_of_marking(true);
782 782 }
783 783 return false;
784 784 }
785 785 };
786 786
787 787 void ConcurrentMark::checkpointRootsInitialPre() {
788 788 G1CollectedHeap* g1h = G1CollectedHeap::heap();
789 789 G1CollectorPolicy* g1p = g1h->g1_policy();
790 790
791 791 _has_aborted = false;
792 792
793 793 #ifndef PRODUCT
794 794 if (G1PrintReachableAtInitialMark) {
795 795 print_reachable("at-cycle-start",
796 796 VerifyOption_G1UsePrevMarking, true /* all */);
797 797 }
798 798 #endif
799 799
800 800 // Initialise marking structures. This has to be done in a STW phase.
801 801 reset();
802 802 }
803 803
804 804 class CMMarkRootsClosure: public OopsInGenClosure {
805 805 private:
806 806 ConcurrentMark* _cm;
807 807 G1CollectedHeap* _g1h;
808 808 bool _do_barrier;
809 809
810 810 public:
811 811 CMMarkRootsClosure(ConcurrentMark* cm,
812 812 G1CollectedHeap* g1h,
813 813 bool do_barrier) : _cm(cm), _g1h(g1h),
814 814 _do_barrier(do_barrier) { }
815 815
816 816 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
817 817 virtual void do_oop( oop* p) { do_oop_work(p); }
818 818
819 819 template <class T> void do_oop_work(T* p) {
820 820 T heap_oop = oopDesc::load_heap_oop(p);
821 821 if (!oopDesc::is_null(heap_oop)) {
822 822 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
823 823 assert(obj->is_oop() || obj->mark() == NULL,
824 824 "expected an oop, possibly with mark word displaced");
825 825 HeapWord* addr = (HeapWord*)obj;
826 826 if (_g1h->is_in_g1_reserved(addr)) {
827 827 _cm->grayRoot(obj);
828 828 }
829 829 }
830 830 if (_do_barrier) {
831 831 assert(!_g1h->is_in_g1_reserved(p),
832 832 "Should be called on external roots");
833 833 do_barrier(p);
834 834 }
835 835 }
836 836 };
837 837
838 838 void ConcurrentMark::checkpointRootsInitialPost() {
839 839 G1CollectedHeap* g1h = G1CollectedHeap::heap();
840 840
841 841 // If we force an overflow during remark, the remark operation will
842 842 // actually abort and we'll restart concurrent marking. If we always
↓ open down ↓ |
842 lines elided |
↑ open up ↑ |
843 843 // force an oveflow during remark we'll never actually complete the
844 844 // marking phase. So, we initilize this here, at the start of the
845 845 // cycle, so that at the remaining overflow number will decrease at
846 846 // every remark and we'll eventually not need to cause one.
847 847 force_overflow_stw()->init();
848 848
849 849 // For each region note start of marking.
850 850 NoteStartOfMarkHRClosure startcl;
851 851 g1h->heap_region_iterate(&startcl);
852 852
853 - // Start weak-reference discovery.
854 - ReferenceProcessor* rp = g1h->ref_processor();
853 + // Start Concurrent Marking weak-reference discovery.
854 + ReferenceProcessor* rp = g1h->ref_processor_cm();
855 + assert(!rp->discovery_enabled(), "Precondition");
855 856 rp->verify_no_references_recorded();
856 857 rp->enable_discovery(); // enable ("weak") refs discovery
857 858 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
858 859
859 860 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
860 861 // This is the start of the marking cycle, we're expected all
861 862 // threads to have SATB queues with active set to false.
862 863 satb_mq_set.set_active_all_threads(true, /* new active value */
863 864 false /* expected_active */);
864 865
865 866 // update_g1_committed() will be called at the end of an evac pause
866 867 // when marking is on. So, it's also called at the end of the
867 868 // initial-mark pause to update the heap end, if the heap expands
868 869 // during it. No need to call it here.
869 870 }
870 871
871 872 // Checkpoint the roots into this generation from outside
872 873 // this generation. [Note this initial checkpoint need only
873 874 // be approximate -- we'll do a catch up phase subsequently.]
874 875 void ConcurrentMark::checkpointRootsInitial() {
875 876 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
876 877 G1CollectedHeap* g1h = G1CollectedHeap::heap();
877 878
878 879 double start = os::elapsedTime();
879 880
880 881 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
881 882 g1p->record_concurrent_mark_init_start();
882 883 checkpointRootsInitialPre();
883 884
884 885 // YSR: when concurrent precleaning is in place, we'll
885 886 // need to clear the cached card table here
886 887
887 888 ResourceMark rm;
888 889 HandleMark hm;
889 890
890 891 g1h->ensure_parsability(false);
891 892 g1h->perm_gen()->save_marks();
892 893
893 894 CMMarkRootsClosure notOlder(this, g1h, false);
894 895 CMMarkRootsClosure older(this, g1h, true);
895 896
896 897 g1h->set_marking_started();
897 898 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
898 899
899 900 g1h->process_strong_roots(true, // activate StrongRootsScope
900 901 false, // fake perm gen collection
901 902 SharedHeap::SO_AllClasses,
902 903 ¬Older, // Regular roots
903 904 NULL, // do not visit active blobs
904 905 &older // Perm Gen Roots
905 906 );
906 907 checkpointRootsInitialPost();
907 908
908 909 // Statistics.
909 910 double end = os::elapsedTime();
910 911 _init_times.add((end - start) * 1000.0);
911 912
912 913 g1p->record_concurrent_mark_init_end();
913 914 }
914 915
915 916 /*
916 917 * Notice that in the next two methods, we actually leave the STS
917 918 * during the barrier sync and join it immediately afterwards. If we
918 919 * do not do this, the following deadlock can occur: one thread could
919 920 * be in the barrier sync code, waiting for the other thread to also
920 921 * sync up, whereas another one could be trying to yield, while also
921 922 * waiting for the other threads to sync up too.
922 923 *
923 924 * Note, however, that this code is also used during remark and in
924 925 * this case we should not attempt to leave / enter the STS, otherwise
925 926 * we'll either hit an asseert (debug / fastdebug) or deadlock
926 927 * (product). So we should only leave / enter the STS if we are
927 928 * operating concurrently.
928 929 *
929 930 * Because the thread that does the sync barrier has left the STS, it
930 931 * is possible to be suspended for a Full GC or an evacuation pause
931 932 * could occur. This is actually safe, since the entering the sync
932 933 * barrier is one of the last things do_marking_step() does, and it
933 934 * doesn't manipulate any data structures afterwards.
934 935 */
935 936
936 937 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
937 938 if (verbose_low()) {
938 939 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
939 940 }
940 941
941 942 if (concurrent()) {
942 943 ConcurrentGCThread::stsLeave();
943 944 }
944 945 _first_overflow_barrier_sync.enter();
945 946 if (concurrent()) {
946 947 ConcurrentGCThread::stsJoin();
947 948 }
948 949 // at this point everyone should have synced up and not be doing any
949 950 // more work
950 951
951 952 if (verbose_low()) {
952 953 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
953 954 }
954 955
955 956 // let task 0 do this
956 957 if (task_num == 0) {
957 958 // task 0 is responsible for clearing the global data structures
958 959 // We should be here because of an overflow. During STW we should
959 960 // not clear the overflow flag since we rely on it being true when
960 961 // we exit this method to abort the pause and restart concurent
961 962 // marking.
962 963 clear_marking_state(concurrent() /* clear_overflow */);
963 964 force_overflow()->update();
964 965
965 966 if (PrintGC) {
966 967 gclog_or_tty->date_stamp(PrintGCDateStamps);
967 968 gclog_or_tty->stamp(PrintGCTimeStamps);
968 969 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
969 970 }
970 971 }
971 972
972 973 // after this, each task should reset its own data structures then
973 974 // then go into the second barrier
974 975 }
975 976
976 977 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
977 978 if (verbose_low()) {
978 979 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
979 980 }
980 981
981 982 if (concurrent()) {
982 983 ConcurrentGCThread::stsLeave();
983 984 }
984 985 _second_overflow_barrier_sync.enter();
985 986 if (concurrent()) {
986 987 ConcurrentGCThread::stsJoin();
987 988 }
988 989 // at this point everything should be re-initialised and ready to go
989 990
990 991 if (verbose_low()) {
991 992 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
992 993 }
993 994 }
994 995
995 996 #ifndef PRODUCT
996 997 void ForceOverflowSettings::init() {
997 998 _num_remaining = G1ConcMarkForceOverflow;
998 999 _force = false;
999 1000 update();
1000 1001 }
1001 1002
1002 1003 void ForceOverflowSettings::update() {
1003 1004 if (_num_remaining > 0) {
1004 1005 _num_remaining -= 1;
1005 1006 _force = true;
1006 1007 } else {
1007 1008 _force = false;
1008 1009 }
1009 1010 }
1010 1011
1011 1012 bool ForceOverflowSettings::should_force() {
1012 1013 if (_force) {
1013 1014 _force = false;
1014 1015 return true;
1015 1016 } else {
1016 1017 return false;
1017 1018 }
1018 1019 }
1019 1020 #endif // !PRODUCT
1020 1021
1021 1022 void ConcurrentMark::grayRoot(oop p) {
1022 1023 HeapWord* addr = (HeapWord*) p;
1023 1024 // We can't really check against _heap_start and _heap_end, since it
1024 1025 // is possible during an evacuation pause with piggy-backed
1025 1026 // initial-mark that the committed space is expanded during the
1026 1027 // pause without CM observing this change. So the assertions below
1027 1028 // is a bit conservative; but better than nothing.
1028 1029 assert(_g1h->g1_committed().contains(addr),
1029 1030 "address should be within the heap bounds");
1030 1031
1031 1032 if (!_nextMarkBitMap->isMarked(addr)) {
1032 1033 _nextMarkBitMap->parMark(addr);
1033 1034 }
1034 1035 }
1035 1036
1036 1037 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
1037 1038 // The objects on the region have already been marked "in bulk" by
1038 1039 // the caller. We only need to decide whether to push the region on
1039 1040 // the region stack or not.
1040 1041
1041 1042 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1042 1043 // We're done with marking and waiting for remark. We do not need to
1043 1044 // push anything else on the region stack.
1044 1045 return;
1045 1046 }
1046 1047
1047 1048 HeapWord* finger = _finger;
1048 1049
1049 1050 if (verbose_low()) {
1050 1051 gclog_or_tty->print_cr("[global] attempting to push "
1051 1052 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1052 1053 PTR_FORMAT, mr.start(), mr.end(), finger);
1053 1054 }
1054 1055
1055 1056 if (mr.start() < finger) {
1056 1057 // The finger is always heap region aligned and it is not possible
1057 1058 // for mr to span heap regions.
1058 1059 assert(mr.end() <= finger, "invariant");
1059 1060
1060 1061 // Separated the asserts so that we know which one fires.
1061 1062 assert(mr.start() <= mr.end(),
1062 1063 "region boundaries should fall within the committed space");
1063 1064 assert(_heap_start <= mr.start(),
1064 1065 "region boundaries should fall within the committed space");
1065 1066 assert(mr.end() <= _heap_end,
1066 1067 "region boundaries should fall within the committed space");
1067 1068 if (verbose_low()) {
1068 1069 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1069 1070 "below the finger, pushing it",
1070 1071 mr.start(), mr.end());
1071 1072 }
1072 1073
1073 1074 if (!region_stack_push_lock_free(mr)) {
1074 1075 if (verbose_low()) {
1075 1076 gclog_or_tty->print_cr("[global] region stack has overflown.");
1076 1077 }
1077 1078 }
1078 1079 }
1079 1080 }
1080 1081
1081 1082 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1082 1083 // The object is not marked by the caller. We need to at least mark
1083 1084 // it and maybe push in on the stack.
1084 1085
1085 1086 HeapWord* addr = (HeapWord*)p;
1086 1087 if (!_nextMarkBitMap->isMarked(addr)) {
1087 1088 // We definitely need to mark it, irrespective whether we bail out
1088 1089 // because we're done with marking.
1089 1090 if (_nextMarkBitMap->parMark(addr)) {
1090 1091 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1091 1092 // If we're done with concurrent marking and we're waiting for
1092 1093 // remark, then we're not pushing anything on the stack.
1093 1094 return;
1094 1095 }
1095 1096
1096 1097 // No OrderAccess:store_load() is needed. It is implicit in the
1097 1098 // CAS done in parMark(addr) above
1098 1099 HeapWord* finger = _finger;
1099 1100
1100 1101 if (addr < finger) {
1101 1102 if (!mark_stack_push(oop(addr))) {
1102 1103 if (verbose_low()) {
1103 1104 gclog_or_tty->print_cr("[global] global stack overflow "
1104 1105 "during parMark");
1105 1106 }
1106 1107 }
1107 1108 }
1108 1109 }
1109 1110 }
1110 1111 }
1111 1112
1112 1113 class CMConcurrentMarkingTask: public AbstractGangTask {
1113 1114 private:
1114 1115 ConcurrentMark* _cm;
1115 1116 ConcurrentMarkThread* _cmt;
1116 1117
1117 1118 public:
1118 1119 void work(int worker_i) {
1119 1120 assert(Thread::current()->is_ConcurrentGC_thread(),
1120 1121 "this should only be done by a conc GC thread");
1121 1122 ResourceMark rm;
1122 1123
1123 1124 double start_vtime = os::elapsedVTime();
1124 1125
1125 1126 ConcurrentGCThread::stsJoin();
1126 1127
1127 1128 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1128 1129 CMTask* the_task = _cm->task(worker_i);
1129 1130 the_task->record_start_time();
1130 1131 if (!_cm->has_aborted()) {
1131 1132 do {
1132 1133 double start_vtime_sec = os::elapsedVTime();
1133 1134 double start_time_sec = os::elapsedTime();
1134 1135 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1135 1136
1136 1137 the_task->do_marking_step(mark_step_duration_ms,
1137 1138 true /* do_stealing */,
1138 1139 true /* do_termination */);
1139 1140
1140 1141 double end_time_sec = os::elapsedTime();
1141 1142 double end_vtime_sec = os::elapsedVTime();
1142 1143 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1143 1144 double elapsed_time_sec = end_time_sec - start_time_sec;
1144 1145 _cm->clear_has_overflown();
1145 1146
1146 1147 bool ret = _cm->do_yield_check(worker_i);
1147 1148
1148 1149 jlong sleep_time_ms;
1149 1150 if (!_cm->has_aborted() && the_task->has_aborted()) {
1150 1151 sleep_time_ms =
1151 1152 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1152 1153 ConcurrentGCThread::stsLeave();
1153 1154 os::sleep(Thread::current(), sleep_time_ms, false);
1154 1155 ConcurrentGCThread::stsJoin();
1155 1156 }
1156 1157 double end_time2_sec = os::elapsedTime();
1157 1158 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1158 1159
1159 1160 #if 0
1160 1161 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1161 1162 "overhead %1.4lf",
1162 1163 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1163 1164 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1164 1165 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1165 1166 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1166 1167 #endif
1167 1168 } while (!_cm->has_aborted() && the_task->has_aborted());
1168 1169 }
1169 1170 the_task->record_end_time();
1170 1171 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1171 1172
1172 1173 ConcurrentGCThread::stsLeave();
1173 1174
1174 1175 double end_vtime = os::elapsedVTime();
1175 1176 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1176 1177 }
1177 1178
1178 1179 CMConcurrentMarkingTask(ConcurrentMark* cm,
1179 1180 ConcurrentMarkThread* cmt) :
1180 1181 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1181 1182
1182 1183 ~CMConcurrentMarkingTask() { }
1183 1184 };
1184 1185
1185 1186 void ConcurrentMark::markFromRoots() {
1186 1187 // we might be tempted to assert that:
1187 1188 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1188 1189 // "inconsistent argument?");
1189 1190 // However that wouldn't be right, because it's possible that
1190 1191 // a safepoint is indeed in progress as a younger generation
1191 1192 // stop-the-world GC happens even as we mark in this generation.
1192 1193
1193 1194 _restart_for_overflow = false;
1194 1195
1195 1196 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1196 1197 force_overflow_conc()->init();
1197 1198 set_phase(active_workers, true /* concurrent */);
1198 1199
1199 1200 CMConcurrentMarkingTask markingTask(this, cmThread());
1200 1201 if (parallel_marking_threads() > 0) {
1201 1202 _parallel_workers->run_task(&markingTask);
↓ open down ↓ |
337 lines elided |
↑ open up ↑ |
1202 1203 } else {
1203 1204 markingTask.work(0);
1204 1205 }
1205 1206 print_stats();
1206 1207 }
1207 1208
1208 1209 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1209 1210 // world is stopped at this checkpoint
1210 1211 assert(SafepointSynchronize::is_at_safepoint(),
1211 1212 "world should be stopped");
1213 +
1212 1214 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1213 1215
1214 1216 // If a full collection has happened, we shouldn't do this.
1215 1217 if (has_aborted()) {
1216 1218 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1217 1219 return;
1218 1220 }
1219 1221
1220 1222 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1221 1223
1222 1224 if (VerifyDuringGC) {
1223 1225 HandleMark hm; // handle scope
1224 1226 gclog_or_tty->print(" VerifyDuringGC:(before)");
1225 1227 Universe::heap()->prepare_for_verify();
1226 1228 Universe::verify(/* allow dirty */ true,
1227 1229 /* silent */ false,
1228 1230 /* option */ VerifyOption_G1UsePrevMarking);
1229 1231 }
1230 1232
1231 1233 G1CollectorPolicy* g1p = g1h->g1_policy();
1232 1234 g1p->record_concurrent_mark_remark_start();
1233 1235
1234 1236 double start = os::elapsedTime();
1235 1237
1236 1238 checkpointRootsFinalWork();
1237 1239
1238 1240 double mark_work_end = os::elapsedTime();
1239 1241
1240 1242 weakRefsWork(clear_all_soft_refs);
1241 1243
1242 1244 if (has_overflown()) {
1243 1245 // Oops. We overflowed. Restart concurrent marking.
1244 1246 _restart_for_overflow = true;
1245 1247 // Clear the flag. We do not need it any more.
1246 1248 clear_has_overflown();
1247 1249 if (G1TraceMarkStackOverflow) {
1248 1250 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1249 1251 }
1250 1252 } else {
1251 1253 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1252 1254 // We're done with marking.
1253 1255 // This is the end of the marking cycle, we're expected all
1254 1256 // threads to have SATB queues with active set to true.
1255 1257 satb_mq_set.set_active_all_threads(false, /* new active value */
1256 1258 true /* expected_active */);
1257 1259
1258 1260 if (VerifyDuringGC) {
1259 1261 HandleMark hm; // handle scope
1260 1262 gclog_or_tty->print(" VerifyDuringGC:(after)");
1261 1263 Universe::heap()->prepare_for_verify();
1262 1264 Universe::verify(/* allow dirty */ true,
1263 1265 /* silent */ false,
1264 1266 /* option */ VerifyOption_G1UseNextMarking);
1265 1267 }
1266 1268 assert(!restart_for_overflow(), "sanity");
1267 1269 }
1268 1270
1269 1271 // Reset the marking state if marking completed
1270 1272 if (!restart_for_overflow()) {
1271 1273 set_non_marking_state();
1272 1274 }
1273 1275
1274 1276 #if VERIFY_OBJS_PROCESSED
1275 1277 _scan_obj_cl.objs_processed = 0;
1276 1278 ThreadLocalObjQueue::objs_enqueued = 0;
1277 1279 #endif
1278 1280
1279 1281 // Statistics
1280 1282 double now = os::elapsedTime();
1281 1283 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1282 1284 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1283 1285 _remark_times.add((now - start) * 1000.0);
1284 1286
1285 1287 g1p->record_concurrent_mark_remark_end();
1286 1288 }
1287 1289
1288 1290 #define CARD_BM_TEST_MODE 0
1289 1291
1290 1292 class CalcLiveObjectsClosure: public HeapRegionClosure {
1291 1293
1292 1294 CMBitMapRO* _bm;
1293 1295 ConcurrentMark* _cm;
1294 1296 bool _changed;
1295 1297 bool _yield;
1296 1298 size_t _words_done;
1297 1299 size_t _tot_live;
1298 1300 size_t _tot_used;
1299 1301 size_t _regions_done;
1300 1302 double _start_vtime_sec;
1301 1303
1302 1304 BitMap* _region_bm;
1303 1305 BitMap* _card_bm;
1304 1306 intptr_t _bottom_card_num;
1305 1307 bool _final;
1306 1308
1307 1309 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1308 1310 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1309 1311 #if CARD_BM_TEST_MODE
1310 1312 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1311 1313 #else
1312 1314 _card_bm->par_at_put(i - _bottom_card_num, 1);
1313 1315 #endif
1314 1316 }
1315 1317 }
1316 1318
1317 1319 public:
1318 1320 CalcLiveObjectsClosure(bool final,
1319 1321 CMBitMapRO *bm, ConcurrentMark *cm,
1320 1322 BitMap* region_bm, BitMap* card_bm) :
1321 1323 _bm(bm), _cm(cm), _changed(false), _yield(true),
1322 1324 _words_done(0), _tot_live(0), _tot_used(0),
1323 1325 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1324 1326 _regions_done(0), _start_vtime_sec(0.0)
1325 1327 {
1326 1328 _bottom_card_num =
1327 1329 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1328 1330 CardTableModRefBS::card_shift);
1329 1331 }
1330 1332
1331 1333 // It takes a region that's not empty (i.e., it has at least one
1332 1334 // live object in it and sets its corresponding bit on the region
1333 1335 // bitmap to 1. If the region is "starts humongous" it will also set
1334 1336 // to 1 the bits on the region bitmap that correspond to its
1335 1337 // associated "continues humongous" regions.
1336 1338 void set_bit_for_region(HeapRegion* hr) {
1337 1339 assert(!hr->continuesHumongous(), "should have filtered those out");
1338 1340
1339 1341 size_t index = hr->hrs_index();
1340 1342 if (!hr->startsHumongous()) {
1341 1343 // Normal (non-humongous) case: just set the bit.
1342 1344 _region_bm->par_at_put((BitMap::idx_t) index, true);
1343 1345 } else {
1344 1346 // Starts humongous case: calculate how many regions are part of
1345 1347 // this humongous region and then set the bit range. It might
1346 1348 // have been a bit more efficient to look at the object that
1347 1349 // spans these humongous regions to calculate their number from
1348 1350 // the object's size. However, it's a good idea to calculate
1349 1351 // this based on the metadata itself, and not the region
1350 1352 // contents, so that this code is not aware of what goes into
1351 1353 // the humongous regions (in case this changes in the future).
1352 1354 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1353 1355 size_t end_index = index + 1;
1354 1356 while (end_index < g1h->n_regions()) {
1355 1357 HeapRegion* chr = g1h->region_at(end_index);
1356 1358 if (!chr->continuesHumongous()) break;
1357 1359 end_index += 1;
1358 1360 }
1359 1361 _region_bm->par_at_put_range((BitMap::idx_t) index,
1360 1362 (BitMap::idx_t) end_index, true);
1361 1363 }
1362 1364 }
1363 1365
1364 1366 bool doHeapRegion(HeapRegion* hr) {
1365 1367 if (!_final && _regions_done == 0) {
1366 1368 _start_vtime_sec = os::elapsedVTime();
1367 1369 }
1368 1370
1369 1371 if (hr->continuesHumongous()) {
1370 1372 // We will ignore these here and process them when their
1371 1373 // associated "starts humongous" region is processed (see
1372 1374 // set_bit_for_heap_region()). Note that we cannot rely on their
1373 1375 // associated "starts humongous" region to have their bit set to
1374 1376 // 1 since, due to the region chunking in the parallel region
1375 1377 // iteration, a "continues humongous" region might be visited
1376 1378 // before its associated "starts humongous".
1377 1379 return false;
1378 1380 }
1379 1381
1380 1382 HeapWord* nextTop = hr->next_top_at_mark_start();
1381 1383 HeapWord* start = hr->top_at_conc_mark_count();
1382 1384 assert(hr->bottom() <= start && start <= hr->end() &&
1383 1385 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1384 1386 start <= nextTop,
1385 1387 "Preconditions.");
1386 1388 // Otherwise, record the number of word's we'll examine.
1387 1389 size_t words_done = (nextTop - start);
1388 1390 // Find the first marked object at or after "start".
1389 1391 start = _bm->getNextMarkedWordAddress(start, nextTop);
1390 1392 size_t marked_bytes = 0;
1391 1393
1392 1394 // Below, the term "card num" means the result of shifting an address
1393 1395 // by the card shift -- address 0 corresponds to card number 0. One
1394 1396 // must subtract the card num of the bottom of the heap to obtain a
1395 1397 // card table index.
1396 1398 // The first card num of the sequence of live cards currently being
1397 1399 // constructed. -1 ==> no sequence.
1398 1400 intptr_t start_card_num = -1;
1399 1401 // The last card num of the sequence of live cards currently being
1400 1402 // constructed. -1 ==> no sequence.
1401 1403 intptr_t last_card_num = -1;
1402 1404
1403 1405 while (start < nextTop) {
1404 1406 if (_yield && _cm->do_yield_check()) {
1405 1407 // We yielded. It might be for a full collection, in which case
1406 1408 // all bets are off; terminate the traversal.
1407 1409 if (_cm->has_aborted()) {
1408 1410 _changed = false;
1409 1411 return true;
1410 1412 } else {
1411 1413 // Otherwise, it might be a collection pause, and the region
1412 1414 // we're looking at might be in the collection set. We'll
1413 1415 // abandon this region.
1414 1416 return false;
1415 1417 }
1416 1418 }
1417 1419 oop obj = oop(start);
1418 1420 int obj_sz = obj->size();
1419 1421 // The card num of the start of the current object.
1420 1422 intptr_t obj_card_num =
1421 1423 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1422 1424
1423 1425 HeapWord* obj_last = start + obj_sz - 1;
1424 1426 intptr_t obj_last_card_num =
1425 1427 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1426 1428
1427 1429 if (obj_card_num != last_card_num) {
1428 1430 if (start_card_num == -1) {
1429 1431 assert(last_card_num == -1, "Both or neither.");
1430 1432 start_card_num = obj_card_num;
1431 1433 } else {
1432 1434 assert(last_card_num != -1, "Both or neither.");
1433 1435 assert(obj_card_num >= last_card_num, "Inv");
1434 1436 if ((obj_card_num - last_card_num) > 1) {
1435 1437 // Mark the last run, and start a new one.
1436 1438 mark_card_num_range(start_card_num, last_card_num);
1437 1439 start_card_num = obj_card_num;
1438 1440 }
1439 1441 }
1440 1442 #if CARD_BM_TEST_MODE
1441 1443 /*
1442 1444 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1443 1445 obj_card_num - _bottom_card_num,
1444 1446 obj_last_card_num - _bottom_card_num);
1445 1447 */
1446 1448 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1447 1449 _card_bm->par_at_put(j - _bottom_card_num, 1);
1448 1450 }
1449 1451 #endif
1450 1452 }
1451 1453 // In any case, we set the last card num.
1452 1454 last_card_num = obj_last_card_num;
1453 1455
1454 1456 marked_bytes += (size_t)obj_sz * HeapWordSize;
1455 1457 // Find the next marked object after this one.
1456 1458 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1457 1459 _changed = true;
1458 1460 }
1459 1461 // Handle the last range, if any.
1460 1462 if (start_card_num != -1) {
1461 1463 mark_card_num_range(start_card_num, last_card_num);
1462 1464 }
1463 1465 if (_final) {
1464 1466 // Mark the allocated-since-marking portion...
1465 1467 HeapWord* tp = hr->top();
1466 1468 if (nextTop < tp) {
1467 1469 start_card_num =
1468 1470 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1469 1471 last_card_num =
1470 1472 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1471 1473 mark_card_num_range(start_card_num, last_card_num);
1472 1474 // This definitely means the region has live objects.
1473 1475 set_bit_for_region(hr);
1474 1476 }
1475 1477 }
1476 1478
1477 1479 hr->add_to_marked_bytes(marked_bytes);
1478 1480 // Update the live region bitmap.
1479 1481 if (marked_bytes > 0) {
1480 1482 set_bit_for_region(hr);
1481 1483 }
1482 1484 hr->set_top_at_conc_mark_count(nextTop);
1483 1485 _tot_live += hr->next_live_bytes();
1484 1486 _tot_used += hr->used();
1485 1487 _words_done = words_done;
1486 1488
1487 1489 if (!_final) {
1488 1490 ++_regions_done;
1489 1491 if (_regions_done % 10 == 0) {
1490 1492 double end_vtime_sec = os::elapsedVTime();
1491 1493 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1492 1494 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1493 1495 jlong sleep_time_ms =
1494 1496 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1495 1497 os::sleep(Thread::current(), sleep_time_ms, false);
1496 1498 _start_vtime_sec = end_vtime_sec;
1497 1499 }
1498 1500 }
1499 1501 }
1500 1502
1501 1503 return false;
1502 1504 }
1503 1505
1504 1506 bool changed() { return _changed; }
1505 1507 void reset() { _changed = false; _words_done = 0; }
1506 1508 void no_yield() { _yield = false; }
1507 1509 size_t words_done() { return _words_done; }
1508 1510 size_t tot_live() { return _tot_live; }
1509 1511 size_t tot_used() { return _tot_used; }
1510 1512 };
1511 1513
1512 1514
1513 1515 void ConcurrentMark::calcDesiredRegions() {
1514 1516 _region_bm.clear();
1515 1517 _card_bm.clear();
1516 1518 CalcLiveObjectsClosure calccl(false /*final*/,
1517 1519 nextMarkBitMap(), this,
1518 1520 &_region_bm, &_card_bm);
1519 1521 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1520 1522 g1h->heap_region_iterate(&calccl);
1521 1523
1522 1524 do {
1523 1525 calccl.reset();
1524 1526 g1h->heap_region_iterate(&calccl);
1525 1527 } while (calccl.changed());
1526 1528 }
1527 1529
1528 1530 class G1ParFinalCountTask: public AbstractGangTask {
1529 1531 protected:
1530 1532 G1CollectedHeap* _g1h;
1531 1533 CMBitMap* _bm;
1532 1534 size_t _n_workers;
1533 1535 size_t *_live_bytes;
1534 1536 size_t *_used_bytes;
1535 1537 BitMap* _region_bm;
1536 1538 BitMap* _card_bm;
1537 1539 public:
1538 1540 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1539 1541 BitMap* region_bm, BitMap* card_bm)
1540 1542 : AbstractGangTask("G1 final counting"), _g1h(g1h),
1541 1543 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
1542 1544 if (ParallelGCThreads > 0) {
1543 1545 _n_workers = _g1h->workers()->total_workers();
1544 1546 } else {
1545 1547 _n_workers = 1;
1546 1548 }
1547 1549 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1548 1550 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1549 1551 }
1550 1552
1551 1553 ~G1ParFinalCountTask() {
1552 1554 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1553 1555 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1554 1556 }
1555 1557
1556 1558 void work(int i) {
1557 1559 CalcLiveObjectsClosure calccl(true /*final*/,
1558 1560 _bm, _g1h->concurrent_mark(),
1559 1561 _region_bm, _card_bm);
1560 1562 calccl.no_yield();
1561 1563 if (G1CollectedHeap::use_parallel_gc_threads()) {
1562 1564 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1563 1565 HeapRegion::FinalCountClaimValue);
1564 1566 } else {
1565 1567 _g1h->heap_region_iterate(&calccl);
1566 1568 }
1567 1569 assert(calccl.complete(), "Shouldn't have yielded!");
1568 1570
1569 1571 assert((size_t) i < _n_workers, "invariant");
1570 1572 _live_bytes[i] = calccl.tot_live();
1571 1573 _used_bytes[i] = calccl.tot_used();
1572 1574 }
1573 1575 size_t live_bytes() {
1574 1576 size_t live_bytes = 0;
1575 1577 for (size_t i = 0; i < _n_workers; ++i)
1576 1578 live_bytes += _live_bytes[i];
1577 1579 return live_bytes;
1578 1580 }
1579 1581 size_t used_bytes() {
1580 1582 size_t used_bytes = 0;
1581 1583 for (size_t i = 0; i < _n_workers; ++i)
1582 1584 used_bytes += _used_bytes[i];
1583 1585 return used_bytes;
1584 1586 }
1585 1587 };
1586 1588
1587 1589 class G1ParNoteEndTask;
1588 1590
1589 1591 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1590 1592 G1CollectedHeap* _g1;
1591 1593 int _worker_num;
1592 1594 size_t _max_live_bytes;
1593 1595 size_t _regions_claimed;
1594 1596 size_t _freed_bytes;
1595 1597 FreeRegionList* _local_cleanup_list;
1596 1598 HumongousRegionSet* _humongous_proxy_set;
1597 1599 HRRSCleanupTask* _hrrs_cleanup_task;
1598 1600 double _claimed_region_time;
1599 1601 double _max_region_time;
1600 1602
1601 1603 public:
1602 1604 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1603 1605 int worker_num,
1604 1606 FreeRegionList* local_cleanup_list,
1605 1607 HumongousRegionSet* humongous_proxy_set,
1606 1608 HRRSCleanupTask* hrrs_cleanup_task);
1607 1609 size_t freed_bytes() { return _freed_bytes; }
1608 1610
1609 1611 bool doHeapRegion(HeapRegion *r);
1610 1612
1611 1613 size_t max_live_bytes() { return _max_live_bytes; }
1612 1614 size_t regions_claimed() { return _regions_claimed; }
1613 1615 double claimed_region_time_sec() { return _claimed_region_time; }
1614 1616 double max_region_time_sec() { return _max_region_time; }
1615 1617 };
1616 1618
1617 1619 class G1ParNoteEndTask: public AbstractGangTask {
1618 1620 friend class G1NoteEndOfConcMarkClosure;
1619 1621
1620 1622 protected:
1621 1623 G1CollectedHeap* _g1h;
1622 1624 size_t _max_live_bytes;
1623 1625 size_t _freed_bytes;
1624 1626 FreeRegionList* _cleanup_list;
1625 1627
1626 1628 public:
1627 1629 G1ParNoteEndTask(G1CollectedHeap* g1h,
1628 1630 FreeRegionList* cleanup_list) :
1629 1631 AbstractGangTask("G1 note end"), _g1h(g1h),
1630 1632 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1631 1633
1632 1634 void work(int i) {
1633 1635 double start = os::elapsedTime();
1634 1636 FreeRegionList local_cleanup_list("Local Cleanup List");
1635 1637 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1636 1638 HRRSCleanupTask hrrs_cleanup_task;
1637 1639 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1638 1640 &humongous_proxy_set,
1639 1641 &hrrs_cleanup_task);
1640 1642 if (G1CollectedHeap::use_parallel_gc_threads()) {
1641 1643 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1642 1644 HeapRegion::NoteEndClaimValue);
1643 1645 } else {
1644 1646 _g1h->heap_region_iterate(&g1_note_end);
1645 1647 }
1646 1648 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1647 1649
1648 1650 // Now update the lists
1649 1651 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1650 1652 NULL /* free_list */,
1651 1653 &humongous_proxy_set,
1652 1654 true /* par */);
1653 1655 {
1654 1656 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1655 1657 _max_live_bytes += g1_note_end.max_live_bytes();
1656 1658 _freed_bytes += g1_note_end.freed_bytes();
1657 1659
1658 1660 // If we iterate over the global cleanup list at the end of
1659 1661 // cleanup to do this printing we will not guarantee to only
1660 1662 // generate output for the newly-reclaimed regions (the list
1661 1663 // might not be empty at the beginning of cleanup; we might
1662 1664 // still be working on its previous contents). So we do the
1663 1665 // printing here, before we append the new regions to the global
1664 1666 // cleanup list.
1665 1667
1666 1668 G1HRPrinter* hr_printer = _g1h->hr_printer();
1667 1669 if (hr_printer->is_active()) {
1668 1670 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1669 1671 while (iter.more_available()) {
1670 1672 HeapRegion* hr = iter.get_next();
1671 1673 hr_printer->cleanup(hr);
1672 1674 }
1673 1675 }
1674 1676
1675 1677 _cleanup_list->add_as_tail(&local_cleanup_list);
1676 1678 assert(local_cleanup_list.is_empty(), "post-condition");
1677 1679
1678 1680 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1679 1681 }
1680 1682 double end = os::elapsedTime();
1681 1683 if (G1PrintParCleanupStats) {
1682 1684 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1683 1685 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1684 1686 i, start, end, (end-start)*1000.0,
1685 1687 g1_note_end.regions_claimed(),
1686 1688 g1_note_end.claimed_region_time_sec()*1000.0,
1687 1689 g1_note_end.max_region_time_sec()*1000.0);
1688 1690 }
1689 1691 }
1690 1692 size_t max_live_bytes() { return _max_live_bytes; }
1691 1693 size_t freed_bytes() { return _freed_bytes; }
1692 1694 };
1693 1695
1694 1696 class G1ParScrubRemSetTask: public AbstractGangTask {
1695 1697 protected:
1696 1698 G1RemSet* _g1rs;
1697 1699 BitMap* _region_bm;
1698 1700 BitMap* _card_bm;
1699 1701 public:
1700 1702 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1701 1703 BitMap* region_bm, BitMap* card_bm) :
1702 1704 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1703 1705 _region_bm(region_bm), _card_bm(card_bm)
1704 1706 {}
1705 1707
1706 1708 void work(int i) {
1707 1709 if (G1CollectedHeap::use_parallel_gc_threads()) {
1708 1710 _g1rs->scrub_par(_region_bm, _card_bm, i,
1709 1711 HeapRegion::ScrubRemSetClaimValue);
1710 1712 } else {
1711 1713 _g1rs->scrub(_region_bm, _card_bm);
1712 1714 }
1713 1715 }
1714 1716
1715 1717 };
1716 1718
1717 1719 G1NoteEndOfConcMarkClosure::
1718 1720 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1719 1721 int worker_num,
1720 1722 FreeRegionList* local_cleanup_list,
1721 1723 HumongousRegionSet* humongous_proxy_set,
1722 1724 HRRSCleanupTask* hrrs_cleanup_task)
1723 1725 : _g1(g1), _worker_num(worker_num),
1724 1726 _max_live_bytes(0), _regions_claimed(0),
1725 1727 _freed_bytes(0),
1726 1728 _claimed_region_time(0.0), _max_region_time(0.0),
1727 1729 _local_cleanup_list(local_cleanup_list),
1728 1730 _humongous_proxy_set(humongous_proxy_set),
1729 1731 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1730 1732
1731 1733 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1732 1734 // We use a claim value of zero here because all regions
1733 1735 // were claimed with value 1 in the FinalCount task.
1734 1736 hr->reset_gc_time_stamp();
1735 1737 if (!hr->continuesHumongous()) {
1736 1738 double start = os::elapsedTime();
1737 1739 _regions_claimed++;
1738 1740 hr->note_end_of_marking();
1739 1741 _max_live_bytes += hr->max_live_bytes();
1740 1742 _g1->free_region_if_empty(hr,
1741 1743 &_freed_bytes,
1742 1744 _local_cleanup_list,
1743 1745 _humongous_proxy_set,
1744 1746 _hrrs_cleanup_task,
1745 1747 true /* par */);
1746 1748 double region_time = (os::elapsedTime() - start);
1747 1749 _claimed_region_time += region_time;
1748 1750 if (region_time > _max_region_time) {
1749 1751 _max_region_time = region_time;
1750 1752 }
1751 1753 }
1752 1754 return false;
1753 1755 }
1754 1756
1755 1757 void ConcurrentMark::cleanup() {
1756 1758 // world is stopped at this checkpoint
1757 1759 assert(SafepointSynchronize::is_at_safepoint(),
1758 1760 "world should be stopped");
1759 1761 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1760 1762
1761 1763 // If a full collection has happened, we shouldn't do this.
1762 1764 if (has_aborted()) {
1763 1765 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1764 1766 return;
1765 1767 }
1766 1768
1767 1769 g1h->verify_region_sets_optional();
1768 1770
1769 1771 if (VerifyDuringGC) {
1770 1772 HandleMark hm; // handle scope
1771 1773 gclog_or_tty->print(" VerifyDuringGC:(before)");
1772 1774 Universe::heap()->prepare_for_verify();
1773 1775 Universe::verify(/* allow dirty */ true,
1774 1776 /* silent */ false,
1775 1777 /* option */ VerifyOption_G1UsePrevMarking);
1776 1778 }
1777 1779
1778 1780 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1779 1781 g1p->record_concurrent_mark_cleanup_start();
1780 1782
1781 1783 double start = os::elapsedTime();
1782 1784
1783 1785 HeapRegionRemSet::reset_for_cleanup_tasks();
1784 1786
1785 1787 // Do counting once more with the world stopped for good measure.
1786 1788 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1787 1789 &_region_bm, &_card_bm);
1788 1790 if (G1CollectedHeap::use_parallel_gc_threads()) {
1789 1791 assert(g1h->check_heap_region_claim_values(
1790 1792 HeapRegion::InitialClaimValue),
1791 1793 "sanity check");
1792 1794
1793 1795 int n_workers = g1h->workers()->total_workers();
1794 1796 g1h->set_par_threads(n_workers);
1795 1797 g1h->workers()->run_task(&g1_par_count_task);
1796 1798 g1h->set_par_threads(0);
1797 1799
1798 1800 assert(g1h->check_heap_region_claim_values(
1799 1801 HeapRegion::FinalCountClaimValue),
1800 1802 "sanity check");
1801 1803 } else {
1802 1804 g1_par_count_task.work(0);
1803 1805 }
1804 1806
1805 1807 size_t known_garbage_bytes =
1806 1808 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1807 1809 #if 0
1808 1810 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1809 1811 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1810 1812 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1811 1813 (double) known_garbage_bytes / (double) (1024 * 1024));
1812 1814 #endif // 0
1813 1815 g1p->set_known_garbage_bytes(known_garbage_bytes);
1814 1816
1815 1817 size_t start_used_bytes = g1h->used();
1816 1818 _at_least_one_mark_complete = true;
1817 1819 g1h->set_marking_complete();
1818 1820
1819 1821 double count_end = os::elapsedTime();
1820 1822 double this_final_counting_time = (count_end - start);
1821 1823 if (G1PrintParCleanupStats) {
1822 1824 gclog_or_tty->print_cr("Cleanup:");
1823 1825 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1824 1826 this_final_counting_time*1000.0);
1825 1827 }
1826 1828 _total_counting_time += this_final_counting_time;
1827 1829
1828 1830 if (G1PrintRegionLivenessInfo) {
1829 1831 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1830 1832 _g1h->heap_region_iterate(&cl);
1831 1833 }
1832 1834
1833 1835 // Install newly created mark bitMap as "prev".
1834 1836 swapMarkBitMaps();
1835 1837
1836 1838 g1h->reset_gc_time_stamp();
1837 1839
1838 1840 // Note end of marking in all heap regions.
1839 1841 double note_end_start = os::elapsedTime();
1840 1842 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1841 1843 if (G1CollectedHeap::use_parallel_gc_threads()) {
1842 1844 int n_workers = g1h->workers()->total_workers();
1843 1845 g1h->set_par_threads(n_workers);
1844 1846 g1h->workers()->run_task(&g1_par_note_end_task);
1845 1847 g1h->set_par_threads(0);
1846 1848
1847 1849 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1848 1850 "sanity check");
1849 1851 } else {
1850 1852 g1_par_note_end_task.work(0);
1851 1853 }
1852 1854
1853 1855 if (!cleanup_list_is_empty()) {
1854 1856 // The cleanup list is not empty, so we'll have to process it
1855 1857 // concurrently. Notify anyone else that might be wanting free
1856 1858 // regions that there will be more free regions coming soon.
1857 1859 g1h->set_free_regions_coming();
1858 1860 }
1859 1861 double note_end_end = os::elapsedTime();
1860 1862 if (G1PrintParCleanupStats) {
1861 1863 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1862 1864 (note_end_end - note_end_start)*1000.0);
1863 1865 }
1864 1866
1865 1867
1866 1868 // call below, since it affects the metric by which we sort the heap
1867 1869 // regions.
1868 1870 if (G1ScrubRemSets) {
1869 1871 double rs_scrub_start = os::elapsedTime();
1870 1872 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1871 1873 if (G1CollectedHeap::use_parallel_gc_threads()) {
1872 1874 int n_workers = g1h->workers()->total_workers();
1873 1875 g1h->set_par_threads(n_workers);
1874 1876 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1875 1877 g1h->set_par_threads(0);
1876 1878
1877 1879 assert(g1h->check_heap_region_claim_values(
1878 1880 HeapRegion::ScrubRemSetClaimValue),
1879 1881 "sanity check");
1880 1882 } else {
1881 1883 g1_par_scrub_rs_task.work(0);
1882 1884 }
1883 1885
1884 1886 double rs_scrub_end = os::elapsedTime();
1885 1887 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1886 1888 _total_rs_scrub_time += this_rs_scrub_time;
1887 1889 }
1888 1890
1889 1891 // this will also free any regions totally full of garbage objects,
1890 1892 // and sort the regions.
1891 1893 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1892 1894 g1_par_note_end_task.freed_bytes(),
1893 1895 g1_par_note_end_task.max_live_bytes());
1894 1896
1895 1897 // Statistics.
1896 1898 double end = os::elapsedTime();
1897 1899 _cleanup_times.add((end - start) * 1000.0);
1898 1900
1899 1901 // G1CollectedHeap::heap()->print();
1900 1902 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1901 1903 // G1CollectedHeap::heap()->get_gc_time_stamp());
1902 1904
↓ open down ↓ |
681 lines elided |
↑ open up ↑ |
1903 1905 if (PrintGC || PrintGCDetails) {
1904 1906 g1h->print_size_transition(gclog_or_tty,
1905 1907 start_used_bytes,
1906 1908 g1h->used(),
1907 1909 g1h->capacity());
1908 1910 }
1909 1911
1910 1912 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1911 1913 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1912 1914
1915 + // Clean up will have freed any regions completely full of garbage.
1916 + // Update the soft reference policy with the new heap occupancy.
1917 + Universe::update_heap_info_at_gc();
1918 +
1913 1919 // We need to make this be a "collection" so any collection pause that
1914 1920 // races with it goes around and waits for completeCleanup to finish.
1915 1921 g1h->increment_total_collections();
1916 1922
1917 1923 if (VerifyDuringGC) {
1918 1924 HandleMark hm; // handle scope
1919 1925 gclog_or_tty->print(" VerifyDuringGC:(after)");
1920 1926 Universe::heap()->prepare_for_verify();
1921 1927 Universe::verify(/* allow dirty */ true,
1922 1928 /* silent */ false,
1923 1929 /* option */ VerifyOption_G1UsePrevMarking);
1924 1930 }
1925 1931
1926 1932 g1h->verify_region_sets_optional();
1927 1933 }
1928 1934
1929 1935 void ConcurrentMark::completeCleanup() {
1930 1936 if (has_aborted()) return;
1931 1937
1932 1938 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1933 1939
1934 1940 _cleanup_list.verify_optional();
1935 1941 FreeRegionList tmp_free_list("Tmp Free List");
1936 1942
1937 1943 if (G1ConcRegionFreeingVerbose) {
1938 1944 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1939 1945 "cleanup list has "SIZE_FORMAT" entries",
1940 1946 _cleanup_list.length());
1941 1947 }
1942 1948
1943 1949 // Noone else should be accessing the _cleanup_list at this point,
1944 1950 // so it's not necessary to take any locks
1945 1951 while (!_cleanup_list.is_empty()) {
1946 1952 HeapRegion* hr = _cleanup_list.remove_head();
1947 1953 assert(hr != NULL, "the list was not empty");
1948 1954 hr->par_clear();
1949 1955 tmp_free_list.add_as_tail(hr);
1950 1956
1951 1957 // Instead of adding one region at a time to the secondary_free_list,
1952 1958 // we accumulate them in the local list and move them a few at a
1953 1959 // time. This also cuts down on the number of notify_all() calls
1954 1960 // we do during this process. We'll also append the local list when
1955 1961 // _cleanup_list is empty (which means we just removed the last
1956 1962 // region from the _cleanup_list).
1957 1963 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1958 1964 _cleanup_list.is_empty()) {
1959 1965 if (G1ConcRegionFreeingVerbose) {
1960 1966 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1961 1967 "appending "SIZE_FORMAT" entries to the "
1962 1968 "secondary_free_list, clean list still has "
1963 1969 SIZE_FORMAT" entries",
1964 1970 tmp_free_list.length(),
1965 1971 _cleanup_list.length());
1966 1972 }
1967 1973
1968 1974 {
1969 1975 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1970 1976 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1971 1977 SecondaryFreeList_lock->notify_all();
1972 1978 }
1973 1979
1974 1980 if (G1StressConcRegionFreeing) {
1975 1981 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1976 1982 os::sleep(Thread::current(), (jlong) 1, false);
1977 1983 }
1978 1984 }
1979 1985 }
1980 1986 }
1981 1987 assert(tmp_free_list.is_empty(), "post-condition");
1982 1988 }
1983 1989
1984 1990 // Support closures for reference procssing in G1
1985 1991
1986 1992 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1987 1993 HeapWord* addr = (HeapWord*)obj;
1988 1994 return addr != NULL &&
1989 1995 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1990 1996 }
1991 1997
1992 1998 class G1CMKeepAliveClosure: public OopClosure {
1993 1999 G1CollectedHeap* _g1;
1994 2000 ConcurrentMark* _cm;
1995 2001 CMBitMap* _bitMap;
1996 2002 public:
1997 2003 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1998 2004 CMBitMap* bitMap) :
1999 2005 _g1(g1), _cm(cm),
2000 2006 _bitMap(bitMap) {}
2001 2007
2002 2008 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2003 2009 virtual void do_oop( oop* p) { do_oop_work(p); }
2004 2010
2005 2011 template <class T> void do_oop_work(T* p) {
2006 2012 oop obj = oopDesc::load_decode_heap_oop(p);
2007 2013 HeapWord* addr = (HeapWord*)obj;
2008 2014
2009 2015 if (_cm->verbose_high()) {
2010 2016 gclog_or_tty->print_cr("\t[0] we're looking at location "
2011 2017 "*"PTR_FORMAT" = "PTR_FORMAT,
2012 2018 p, (void*) obj);
2013 2019 }
2014 2020
2015 2021 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2016 2022 _bitMap->mark(addr);
2017 2023 _cm->mark_stack_push(obj);
2018 2024 }
2019 2025 }
2020 2026 };
2021 2027
2022 2028 class G1CMDrainMarkingStackClosure: public VoidClosure {
2023 2029 CMMarkStack* _markStack;
2024 2030 CMBitMap* _bitMap;
2025 2031 G1CMKeepAliveClosure* _oopClosure;
2026 2032 public:
2027 2033 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2028 2034 G1CMKeepAliveClosure* oopClosure) :
2029 2035 _bitMap(bitMap),
2030 2036 _markStack(markStack),
2031 2037 _oopClosure(oopClosure)
2032 2038 {}
2033 2039
2034 2040 void do_void() {
2035 2041 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2036 2042 }
2037 2043 };
2038 2044
2039 2045 // 'Keep Alive' closure used by parallel reference processing.
2040 2046 // An instance of this closure is used in the parallel reference processing
2041 2047 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2042 2048 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2043 2049 // placed on to discovered ref lists once so we can mark and push with no
2044 2050 // need to check whether the object has already been marked. Using the
2045 2051 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2046 2052 // operating on the global mark stack. This means that an individual
2047 2053 // worker would be doing lock-free pushes while it processes its own
2048 2054 // discovered ref list followed by drain call. If the discovered ref lists
2049 2055 // are unbalanced then this could cause interference with the other
2050 2056 // workers. Using a CMTask (and its embedded local data structures)
2051 2057 // avoids that potential interference.
2052 2058 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2053 2059 ConcurrentMark* _cm;
2054 2060 CMTask* _task;
2055 2061 CMBitMap* _bitMap;
2056 2062 int _ref_counter_limit;
2057 2063 int _ref_counter;
2058 2064 public:
2059 2065 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
2060 2066 CMTask* task,
2061 2067 CMBitMap* bitMap) :
2062 2068 _cm(cm), _task(task), _bitMap(bitMap),
2063 2069 _ref_counter_limit(G1RefProcDrainInterval)
2064 2070 {
2065 2071 assert(_ref_counter_limit > 0, "sanity");
2066 2072 _ref_counter = _ref_counter_limit;
2067 2073 }
2068 2074
2069 2075 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2070 2076 virtual void do_oop( oop* p) { do_oop_work(p); }
2071 2077
2072 2078 template <class T> void do_oop_work(T* p) {
2073 2079 if (!_cm->has_overflown()) {
2074 2080 oop obj = oopDesc::load_decode_heap_oop(p);
2075 2081 if (_cm->verbose_high()) {
2076 2082 gclog_or_tty->print_cr("\t[%d] we're looking at location "
2077 2083 "*"PTR_FORMAT" = "PTR_FORMAT,
2078 2084 _task->task_id(), p, (void*) obj);
2079 2085 }
2080 2086
2081 2087 _task->deal_with_reference(obj);
2082 2088 _ref_counter--;
2083 2089
2084 2090 if (_ref_counter == 0) {
2085 2091 // We have dealt with _ref_counter_limit references, pushing them and objects
2086 2092 // reachable from them on to the local stack (and possibly the global stack).
2087 2093 // Call do_marking_step() to process these entries. We call the routine in a
2088 2094 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2089 2095 // with the entries that we've pushed as a result of the deal_with_reference
2090 2096 // calls above) or we overflow.
2091 2097 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2092 2098 // while there may still be some work to do. (See the comment at the
2093 2099 // beginning of CMTask::do_marking_step() for those conditions - one of which
2094 2100 // is reaching the specified time target.) It is only when
2095 2101 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2096 2102 // that the marking has completed.
2097 2103 do {
2098 2104 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2099 2105 _task->do_marking_step(mark_step_duration_ms,
2100 2106 false /* do_stealing */,
2101 2107 false /* do_termination */);
2102 2108 } while (_task->has_aborted() && !_cm->has_overflown());
2103 2109 _ref_counter = _ref_counter_limit;
2104 2110 }
2105 2111 } else {
2106 2112 if (_cm->verbose_high()) {
2107 2113 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2108 2114 }
2109 2115 }
2110 2116 }
2111 2117 };
2112 2118
2113 2119 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2114 2120 ConcurrentMark* _cm;
2115 2121 CMTask* _task;
2116 2122 public:
2117 2123 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2118 2124 _cm(cm), _task(task)
2119 2125 {}
2120 2126
2121 2127 void do_void() {
2122 2128 do {
2123 2129 if (_cm->verbose_high()) {
2124 2130 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2125 2131 _task->task_id());
2126 2132 }
2127 2133
2128 2134 // We call CMTask::do_marking_step() to completely drain the local and
2129 2135 // global marking stacks. The routine is called in a loop, which we'll
2130 2136 // exit if there's nothing more to do (i.e. we'completely drained the
2131 2137 // entries that were pushed as a result of applying the
2132 2138 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2133 2139 // lists above) or we overflow the global marking stack.
2134 2140 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2135 2141 // while there may still be some work to do. (See the comment at the
2136 2142 // beginning of CMTask::do_marking_step() for those conditions - one of which
2137 2143 // is reaching the specified time target.) It is only when
↓ open down ↓ |
215 lines elided |
↑ open up ↑ |
2138 2144 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2139 2145 // that the marking has completed.
2140 2146
2141 2147 _task->do_marking_step(1000000000.0 /* something very large */,
2142 2148 true /* do_stealing */,
2143 2149 true /* do_termination */);
2144 2150 } while (_task->has_aborted() && !_cm->has_overflown());
2145 2151 }
2146 2152 };
2147 2153
2148 -// Implementation of AbstractRefProcTaskExecutor for G1
2149 -class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2154 +// Implementation of AbstractRefProcTaskExecutor for parallel
2155 +// reference processing at the end of G1 concurrent marking
2156 +
2157 +class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2150 2158 private:
2151 2159 G1CollectedHeap* _g1h;
2152 2160 ConcurrentMark* _cm;
2153 2161 CMBitMap* _bitmap;
2154 2162 WorkGang* _workers;
2155 2163 int _active_workers;
2156 2164
2157 2165 public:
2158 - G1RefProcTaskExecutor(G1CollectedHeap* g1h,
2166 + G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2159 2167 ConcurrentMark* cm,
2160 2168 CMBitMap* bitmap,
2161 2169 WorkGang* workers,
2162 2170 int n_workers) :
2163 2171 _g1h(g1h), _cm(cm), _bitmap(bitmap),
2164 2172 _workers(workers), _active_workers(n_workers)
2165 2173 { }
2166 2174
2167 2175 // Executes the given task using concurrent marking worker threads.
2168 2176 virtual void execute(ProcessTask& task);
2169 2177 virtual void execute(EnqueueTask& task);
2170 2178 };
2171 2179
2172 -class G1RefProcTaskProxy: public AbstractGangTask {
2180 +class G1CMRefProcTaskProxy: public AbstractGangTask {
2173 2181 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2174 2182 ProcessTask& _proc_task;
2175 2183 G1CollectedHeap* _g1h;
2176 2184 ConcurrentMark* _cm;
2177 2185 CMBitMap* _bitmap;
2178 2186
2179 2187 public:
2180 - G1RefProcTaskProxy(ProcessTask& proc_task,
2188 + G1CMRefProcTaskProxy(ProcessTask& proc_task,
2181 2189 G1CollectedHeap* g1h,
2182 2190 ConcurrentMark* cm,
2183 2191 CMBitMap* bitmap) :
2184 2192 AbstractGangTask("Process reference objects in parallel"),
2185 2193 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2186 2194 {}
2187 2195
2188 2196 virtual void work(int i) {
2189 2197 CMTask* marking_task = _cm->task(i);
2190 2198 G1CMIsAliveClosure g1_is_alive(_g1h);
2191 2199 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2192 2200 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2193 2201
2194 2202 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2195 2203 }
2196 2204 };
2197 2205
2198 -void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
2206 +void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2199 2207 assert(_workers != NULL, "Need parallel worker threads.");
2200 2208
2201 - G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2209 + G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2202 2210
2203 2211 // We need to reset the phase for each task execution so that
2204 2212 // the termination protocol of CMTask::do_marking_step works.
2205 2213 _cm->set_phase(_active_workers, false /* concurrent */);
2206 2214 _g1h->set_par_threads(_active_workers);
2207 2215 _workers->run_task(&proc_task_proxy);
2208 2216 _g1h->set_par_threads(0);
2209 2217 }
2210 2218
2211 -class G1RefEnqueueTaskProxy: public AbstractGangTask {
2219 +class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2212 2220 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2213 2221 EnqueueTask& _enq_task;
2214 2222
2215 2223 public:
2216 - G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
2224 + G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2217 2225 AbstractGangTask("Enqueue reference objects in parallel"),
2218 2226 _enq_task(enq_task)
2219 2227 { }
2220 2228
2221 2229 virtual void work(int i) {
2222 2230 _enq_task.work(i);
2223 2231 }
2224 2232 };
2225 2233
2226 -void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2234 +void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2227 2235 assert(_workers != NULL, "Need parallel worker threads.");
2228 2236
2229 - G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
2237 + G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2230 2238
2231 2239 _g1h->set_par_threads(_active_workers);
2232 2240 _workers->run_task(&enq_task_proxy);
2233 2241 _g1h->set_par_threads(0);
2234 2242 }
2235 2243
2236 2244 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2237 2245 ResourceMark rm;
2238 2246 HandleMark hm;
2239 2247 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2240 - ReferenceProcessor* rp = g1h->ref_processor();
2248 + ReferenceProcessor* rp = g1h->ref_processor_cm();
2241 2249
2242 2250 // See the comment in G1CollectedHeap::ref_processing_init()
2243 2251 // about how reference processing currently works in G1.
2244 2252
2245 2253 // Process weak references.
2246 2254 rp->setup_policy(clear_all_soft_refs);
2247 2255 assert(_markStack.isEmpty(), "mark stack should be empty");
2248 2256
2249 2257 G1CMIsAliveClosure g1_is_alive(g1h);
2250 2258 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2251 2259 G1CMDrainMarkingStackClosure
2252 2260 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2253 2261 // We use the work gang from the G1CollectedHeap and we utilize all
2254 2262 // the worker threads.
2255 2263 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2256 2264 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2257 2265
2258 - G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2266 + G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2259 2267 g1h->workers(), active_workers);
2260 2268
2261 2269
2262 2270 if (rp->processing_is_mt()) {
2263 2271 // Set the degree of MT here. If the discovery is done MT, there
2264 2272 // may have been a different number of threads doing the discovery
2265 2273 // and a different number of discovered lists may have Ref objects.
2266 2274 // That is OK as long as the Reference lists are balanced (see
2267 2275 // balance_all_queues() and balance_queues()).
2268 2276 rp->set_active_mt_degree(active_workers);
2269 2277
2270 2278 rp->process_discovered_references(&g1_is_alive,
2271 2279 &g1_keep_alive,
2272 2280 &g1_drain_mark_stack,
2273 2281 &par_task_executor);
2274 2282
2275 2283 // The work routines of the parallel keep_alive and drain_marking_stack
2276 2284 // will set the has_overflown flag if we overflow the global marking
2277 2285 // stack.
2278 2286 } else {
2279 2287 rp->process_discovered_references(&g1_is_alive,
2280 2288 &g1_keep_alive,
2281 2289 &g1_drain_mark_stack,
2282 2290 NULL);
2283 2291
2284 2292 }
2285 2293
2286 2294 assert(_markStack.overflow() || _markStack.isEmpty(),
2287 2295 "mark stack should be empty (unless it overflowed)");
2288 2296 if (_markStack.overflow()) {
2289 2297 // Should have been done already when we tried to push an
2290 2298 // entry on to the global mark stack. But let's do it again.
2291 2299 set_has_overflown();
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
2292 2300 }
2293 2301
2294 2302 if (rp->processing_is_mt()) {
2295 2303 assert(rp->num_q() == active_workers, "why not");
2296 2304 rp->enqueue_discovered_references(&par_task_executor);
2297 2305 } else {
2298 2306 rp->enqueue_discovered_references();
2299 2307 }
2300 2308
2301 2309 rp->verify_no_references_recorded();
2302 - assert(!rp->discovery_enabled(), "should have been disabled");
2310 + assert(!rp->discovery_enabled(), "Post condition");
2303 2311
2304 2312 // Now clean up stale oops in StringTable
2305 2313 StringTable::unlink(&g1_is_alive);
2306 2314 // Clean up unreferenced symbols in symbol table.
2307 2315 SymbolTable::unlink();
2308 2316 }
2309 2317
2310 2318 void ConcurrentMark::swapMarkBitMaps() {
2311 2319 CMBitMapRO* temp = _prevMarkBitMap;
2312 2320 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2313 2321 _nextMarkBitMap = (CMBitMap*) temp;
2314 2322 }
2315 2323
2316 2324 class CMRemarkTask: public AbstractGangTask {
2317 2325 private:
2318 2326 ConcurrentMark *_cm;
2319 2327
2320 2328 public:
2321 2329 void work(int worker_i) {
2322 2330 // Since all available tasks are actually started, we should
2323 2331 // only proceed if we're supposed to be actived.
2324 2332 if ((size_t)worker_i < _cm->active_tasks()) {
2325 2333 CMTask* task = _cm->task(worker_i);
2326 2334 task->record_start_time();
2327 2335 do {
2328 2336 task->do_marking_step(1000000000.0 /* something very large */,
2329 2337 true /* do_stealing */,
2330 2338 true /* do_termination */);
2331 2339 } while (task->has_aborted() && !_cm->has_overflown());
2332 2340 // If we overflow, then we do not want to restart. We instead
2333 2341 // want to abort remark and do concurrent marking again.
2334 2342 task->record_end_time();
2335 2343 }
2336 2344 }
2337 2345
2338 2346 CMRemarkTask(ConcurrentMark* cm) :
2339 2347 AbstractGangTask("Par Remark"), _cm(cm) { }
2340 2348 };
2341 2349
2342 2350 void ConcurrentMark::checkpointRootsFinalWork() {
2343 2351 ResourceMark rm;
2344 2352 HandleMark hm;
2345 2353 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2346 2354
2347 2355 g1h->ensure_parsability(false);
2348 2356
2349 2357 if (G1CollectedHeap::use_parallel_gc_threads()) {
2350 2358 G1CollectedHeap::StrongRootsScope srs(g1h);
2351 2359 // this is remark, so we'll use up all available threads
2352 2360 int active_workers = ParallelGCThreads;
2353 2361 set_phase(active_workers, false /* concurrent */);
2354 2362
2355 2363 CMRemarkTask remarkTask(this);
2356 2364 // We will start all available threads, even if we decide that the
2357 2365 // active_workers will be fewer. The extra ones will just bail out
2358 2366 // immediately.
2359 2367 int n_workers = g1h->workers()->total_workers();
2360 2368 g1h->set_par_threads(n_workers);
2361 2369 g1h->workers()->run_task(&remarkTask);
2362 2370 g1h->set_par_threads(0);
2363 2371 } else {
2364 2372 G1CollectedHeap::StrongRootsScope srs(g1h);
2365 2373 // this is remark, so we'll use up all available threads
2366 2374 int active_workers = 1;
2367 2375 set_phase(active_workers, false /* concurrent */);
2368 2376
2369 2377 CMRemarkTask remarkTask(this);
2370 2378 // We will start all available threads, even if we decide that the
2371 2379 // active_workers will be fewer. The extra ones will just bail out
2372 2380 // immediately.
2373 2381 remarkTask.work(0);
2374 2382 }
2375 2383 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2376 2384 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2377 2385
2378 2386 print_stats();
2379 2387
2380 2388 #if VERIFY_OBJS_PROCESSED
2381 2389 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2382 2390 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2383 2391 _scan_obj_cl.objs_processed,
2384 2392 ThreadLocalObjQueue::objs_enqueued);
2385 2393 guarantee(_scan_obj_cl.objs_processed ==
2386 2394 ThreadLocalObjQueue::objs_enqueued,
2387 2395 "Different number of objs processed and enqueued.");
2388 2396 }
2389 2397 #endif
2390 2398 }
2391 2399
2392 2400 #ifndef PRODUCT
2393 2401
2394 2402 class PrintReachableOopClosure: public OopClosure {
2395 2403 private:
2396 2404 G1CollectedHeap* _g1h;
2397 2405 outputStream* _out;
2398 2406 VerifyOption _vo;
2399 2407 bool _all;
2400 2408
2401 2409 public:
2402 2410 PrintReachableOopClosure(outputStream* out,
2403 2411 VerifyOption vo,
2404 2412 bool all) :
2405 2413 _g1h(G1CollectedHeap::heap()),
2406 2414 _out(out), _vo(vo), _all(all) { }
2407 2415
2408 2416 void do_oop(narrowOop* p) { do_oop_work(p); }
2409 2417 void do_oop( oop* p) { do_oop_work(p); }
2410 2418
2411 2419 template <class T> void do_oop_work(T* p) {
2412 2420 oop obj = oopDesc::load_decode_heap_oop(p);
2413 2421 const char* str = NULL;
2414 2422 const char* str2 = "";
2415 2423
2416 2424 if (obj == NULL) {
2417 2425 str = "";
2418 2426 } else if (!_g1h->is_in_g1_reserved(obj)) {
2419 2427 str = " O";
2420 2428 } else {
2421 2429 HeapRegion* hr = _g1h->heap_region_containing(obj);
2422 2430 guarantee(hr != NULL, "invariant");
2423 2431 bool over_tams = false;
2424 2432 bool marked = false;
2425 2433
2426 2434 switch (_vo) {
2427 2435 case VerifyOption_G1UsePrevMarking:
2428 2436 over_tams = hr->obj_allocated_since_prev_marking(obj);
2429 2437 marked = _g1h->isMarkedPrev(obj);
2430 2438 break;
2431 2439 case VerifyOption_G1UseNextMarking:
2432 2440 over_tams = hr->obj_allocated_since_next_marking(obj);
2433 2441 marked = _g1h->isMarkedNext(obj);
2434 2442 break;
2435 2443 case VerifyOption_G1UseMarkWord:
2436 2444 marked = obj->is_gc_marked();
2437 2445 break;
2438 2446 default:
2439 2447 ShouldNotReachHere();
2440 2448 }
2441 2449
2442 2450 if (over_tams) {
2443 2451 str = " >";
2444 2452 if (marked) {
2445 2453 str2 = " AND MARKED";
2446 2454 }
2447 2455 } else if (marked) {
2448 2456 str = " M";
2449 2457 } else {
2450 2458 str = " NOT";
2451 2459 }
2452 2460 }
2453 2461
2454 2462 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2455 2463 p, (void*) obj, str, str2);
2456 2464 }
2457 2465 };
2458 2466
2459 2467 class PrintReachableObjectClosure : public ObjectClosure {
2460 2468 private:
2461 2469 G1CollectedHeap* _g1h;
2462 2470 outputStream* _out;
2463 2471 VerifyOption _vo;
2464 2472 bool _all;
2465 2473 HeapRegion* _hr;
2466 2474
2467 2475 public:
2468 2476 PrintReachableObjectClosure(outputStream* out,
2469 2477 VerifyOption vo,
2470 2478 bool all,
2471 2479 HeapRegion* hr) :
2472 2480 _g1h(G1CollectedHeap::heap()),
2473 2481 _out(out), _vo(vo), _all(all), _hr(hr) { }
2474 2482
2475 2483 void do_object(oop o) {
2476 2484 bool over_tams = false;
2477 2485 bool marked = false;
2478 2486
2479 2487 switch (_vo) {
2480 2488 case VerifyOption_G1UsePrevMarking:
2481 2489 over_tams = _hr->obj_allocated_since_prev_marking(o);
2482 2490 marked = _g1h->isMarkedPrev(o);
2483 2491 break;
2484 2492 case VerifyOption_G1UseNextMarking:
2485 2493 over_tams = _hr->obj_allocated_since_next_marking(o);
2486 2494 marked = _g1h->isMarkedNext(o);
2487 2495 break;
2488 2496 case VerifyOption_G1UseMarkWord:
2489 2497 marked = o->is_gc_marked();
2490 2498 break;
2491 2499 default:
2492 2500 ShouldNotReachHere();
2493 2501 }
2494 2502 bool print_it = _all || over_tams || marked;
2495 2503
2496 2504 if (print_it) {
2497 2505 _out->print_cr(" "PTR_FORMAT"%s",
2498 2506 o, (over_tams) ? " >" : (marked) ? " M" : "");
2499 2507 PrintReachableOopClosure oopCl(_out, _vo, _all);
2500 2508 o->oop_iterate(&oopCl);
2501 2509 }
2502 2510 }
2503 2511 };
2504 2512
2505 2513 class PrintReachableRegionClosure : public HeapRegionClosure {
2506 2514 private:
2507 2515 outputStream* _out;
2508 2516 VerifyOption _vo;
2509 2517 bool _all;
2510 2518
2511 2519 public:
2512 2520 bool doHeapRegion(HeapRegion* hr) {
2513 2521 HeapWord* b = hr->bottom();
2514 2522 HeapWord* e = hr->end();
2515 2523 HeapWord* t = hr->top();
2516 2524 HeapWord* p = NULL;
2517 2525
2518 2526 switch (_vo) {
2519 2527 case VerifyOption_G1UsePrevMarking:
2520 2528 p = hr->prev_top_at_mark_start();
2521 2529 break;
2522 2530 case VerifyOption_G1UseNextMarking:
2523 2531 p = hr->next_top_at_mark_start();
2524 2532 break;
2525 2533 case VerifyOption_G1UseMarkWord:
2526 2534 // When we are verifying marking using the mark word
2527 2535 // TAMS has no relevance.
2528 2536 assert(p == NULL, "post-condition");
2529 2537 break;
2530 2538 default:
2531 2539 ShouldNotReachHere();
2532 2540 }
2533 2541 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2534 2542 "TAMS: "PTR_FORMAT, b, e, t, p);
2535 2543 _out->cr();
2536 2544
2537 2545 HeapWord* from = b;
2538 2546 HeapWord* to = t;
2539 2547
2540 2548 if (to > from) {
2541 2549 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2542 2550 _out->cr();
2543 2551 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2544 2552 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2545 2553 _out->cr();
2546 2554 }
2547 2555
2548 2556 return false;
2549 2557 }
2550 2558
2551 2559 PrintReachableRegionClosure(outputStream* out,
2552 2560 VerifyOption vo,
2553 2561 bool all) :
2554 2562 _out(out), _vo(vo), _all(all) { }
2555 2563 };
2556 2564
2557 2565 static const char* verify_option_to_tams(VerifyOption vo) {
2558 2566 switch (vo) {
2559 2567 case VerifyOption_G1UsePrevMarking:
2560 2568 return "PTAMS";
2561 2569 case VerifyOption_G1UseNextMarking:
2562 2570 return "NTAMS";
2563 2571 default:
2564 2572 return "NONE";
2565 2573 }
2566 2574 }
2567 2575
2568 2576 void ConcurrentMark::print_reachable(const char* str,
2569 2577 VerifyOption vo,
2570 2578 bool all) {
2571 2579 gclog_or_tty->cr();
2572 2580 gclog_or_tty->print_cr("== Doing heap dump... ");
2573 2581
2574 2582 if (G1PrintReachableBaseFile == NULL) {
2575 2583 gclog_or_tty->print_cr(" #### error: no base file defined");
2576 2584 return;
2577 2585 }
2578 2586
2579 2587 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2580 2588 (JVM_MAXPATHLEN - 1)) {
2581 2589 gclog_or_tty->print_cr(" #### error: file name too long");
2582 2590 return;
2583 2591 }
2584 2592
2585 2593 char file_name[JVM_MAXPATHLEN];
2586 2594 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2587 2595 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2588 2596
2589 2597 fileStream fout(file_name);
2590 2598 if (!fout.is_open()) {
2591 2599 gclog_or_tty->print_cr(" #### error: could not open file");
2592 2600 return;
2593 2601 }
2594 2602
2595 2603 outputStream* out = &fout;
2596 2604 out->print_cr("-- USING %s", verify_option_to_tams(vo));
2597 2605 out->cr();
2598 2606
2599 2607 out->print_cr("--- ITERATING OVER REGIONS");
2600 2608 out->cr();
2601 2609 PrintReachableRegionClosure rcl(out, vo, all);
2602 2610 _g1h->heap_region_iterate(&rcl);
2603 2611 out->cr();
2604 2612
2605 2613 gclog_or_tty->print_cr(" done");
2606 2614 gclog_or_tty->flush();
2607 2615 }
2608 2616
2609 2617 #endif // PRODUCT
2610 2618
2611 2619 // This note is for drainAllSATBBuffers and the code in between.
2612 2620 // In the future we could reuse a task to do this work during an
2613 2621 // evacuation pause (since now tasks are not active and can be claimed
2614 2622 // during an evacuation pause). This was a late change to the code and
2615 2623 // is currently not being taken advantage of.
2616 2624
2617 2625 class CMGlobalObjectClosure : public ObjectClosure {
2618 2626 private:
2619 2627 ConcurrentMark* _cm;
2620 2628
2621 2629 public:
2622 2630 void do_object(oop obj) {
2623 2631 _cm->deal_with_reference(obj);
2624 2632 }
2625 2633
2626 2634 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2627 2635 };
2628 2636
2629 2637 void ConcurrentMark::deal_with_reference(oop obj) {
2630 2638 if (verbose_high()) {
2631 2639 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2632 2640 (void*) obj);
2633 2641 }
2634 2642
2635 2643 HeapWord* objAddr = (HeapWord*) obj;
2636 2644 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2637 2645 if (_g1h->is_in_g1_reserved(objAddr)) {
2638 2646 assert(obj != NULL, "null check is implicit");
2639 2647 if (!_nextMarkBitMap->isMarked(objAddr)) {
2640 2648 // Only get the containing region if the object is not marked on the
2641 2649 // bitmap (otherwise, it's a waste of time since we won't do
2642 2650 // anything with it).
2643 2651 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2644 2652 if (!hr->obj_allocated_since_next_marking(obj)) {
2645 2653 if (verbose_high()) {
2646 2654 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2647 2655 "marked", (void*) obj);
2648 2656 }
2649 2657
2650 2658 // we need to mark it first
2651 2659 if (_nextMarkBitMap->parMark(objAddr)) {
2652 2660 // No OrderAccess:store_load() is needed. It is implicit in the
2653 2661 // CAS done in parMark(objAddr) above
2654 2662 HeapWord* finger = _finger;
2655 2663 if (objAddr < finger) {
2656 2664 if (verbose_high()) {
2657 2665 gclog_or_tty->print_cr("[global] below the global finger "
2658 2666 "("PTR_FORMAT"), pushing it", finger);
2659 2667 }
2660 2668 if (!mark_stack_push(obj)) {
2661 2669 if (verbose_low()) {
2662 2670 gclog_or_tty->print_cr("[global] global stack overflow during "
2663 2671 "deal_with_reference");
2664 2672 }
2665 2673 }
2666 2674 }
2667 2675 }
2668 2676 }
2669 2677 }
2670 2678 }
2671 2679 }
2672 2680
2673 2681 void ConcurrentMark::drainAllSATBBuffers() {
2674 2682 CMGlobalObjectClosure oc(this);
2675 2683 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2676 2684 satb_mq_set.set_closure(&oc);
2677 2685
2678 2686 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2679 2687 if (verbose_medium()) {
2680 2688 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2681 2689 }
2682 2690 }
2683 2691
2684 2692 // no need to check whether we should do this, as this is only
2685 2693 // called during an evacuation pause
2686 2694 satb_mq_set.iterate_closure_all_threads();
2687 2695
2688 2696 satb_mq_set.set_closure(NULL);
2689 2697 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2690 2698 }
2691 2699
2692 2700 void ConcurrentMark::markPrev(oop p) {
2693 2701 // Note we are overriding the read-only view of the prev map here, via
2694 2702 // the cast.
2695 2703 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2696 2704 }
2697 2705
2698 2706 void ConcurrentMark::clear(oop p) {
2699 2707 assert(p != NULL && p->is_oop(), "expected an oop");
2700 2708 HeapWord* addr = (HeapWord*)p;
2701 2709 assert(addr >= _nextMarkBitMap->startWord() ||
2702 2710 addr < _nextMarkBitMap->endWord(), "in a region");
2703 2711
2704 2712 _nextMarkBitMap->clear(addr);
2705 2713 }
2706 2714
2707 2715 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2708 2716 // Note we are overriding the read-only view of the prev map here, via
2709 2717 // the cast.
2710 2718 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2711 2719 _nextMarkBitMap->clearRange(mr);
2712 2720 }
2713 2721
2714 2722 HeapRegion*
2715 2723 ConcurrentMark::claim_region(int task_num) {
2716 2724 // "checkpoint" the finger
2717 2725 HeapWord* finger = _finger;
2718 2726
2719 2727 // _heap_end will not change underneath our feet; it only changes at
2720 2728 // yield points.
2721 2729 while (finger < _heap_end) {
2722 2730 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2723 2731
2724 2732 // Note on how this code handles humongous regions. In the
2725 2733 // normal case the finger will reach the start of a "starts
2726 2734 // humongous" (SH) region. Its end will either be the end of the
2727 2735 // last "continues humongous" (CH) region in the sequence, or the
2728 2736 // standard end of the SH region (if the SH is the only region in
2729 2737 // the sequence). That way claim_region() will skip over the CH
2730 2738 // regions. However, there is a subtle race between a CM thread
2731 2739 // executing this method and a mutator thread doing a humongous
2732 2740 // object allocation. The two are not mutually exclusive as the CM
2733 2741 // thread does not need to hold the Heap_lock when it gets
2734 2742 // here. So there is a chance that claim_region() will come across
2735 2743 // a free region that's in the progress of becoming a SH or a CH
2736 2744 // region. In the former case, it will either
2737 2745 // a) Miss the update to the region's end, in which case it will
2738 2746 // visit every subsequent CH region, will find their bitmaps
2739 2747 // empty, and do nothing, or
2740 2748 // b) Will observe the update of the region's end (in which case
2741 2749 // it will skip the subsequent CH regions).
2742 2750 // If it comes across a region that suddenly becomes CH, the
2743 2751 // scenario will be similar to b). So, the race between
2744 2752 // claim_region() and a humongous object allocation might force us
2745 2753 // to do a bit of unnecessary work (due to some unnecessary bitmap
2746 2754 // iterations) but it should not introduce and correctness issues.
2747 2755 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2748 2756 HeapWord* bottom = curr_region->bottom();
2749 2757 HeapWord* end = curr_region->end();
2750 2758 HeapWord* limit = curr_region->next_top_at_mark_start();
2751 2759
2752 2760 if (verbose_low()) {
2753 2761 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2754 2762 "["PTR_FORMAT", "PTR_FORMAT"), "
2755 2763 "limit = "PTR_FORMAT,
2756 2764 task_num, curr_region, bottom, end, limit);
2757 2765 }
2758 2766
2759 2767 // Is the gap between reading the finger and doing the CAS too long?
2760 2768 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2761 2769 if (res == finger) {
2762 2770 // we succeeded
2763 2771
2764 2772 // notice that _finger == end cannot be guaranteed here since,
2765 2773 // someone else might have moved the finger even further
2766 2774 assert(_finger >= end, "the finger should have moved forward");
2767 2775
2768 2776 if (verbose_low()) {
2769 2777 gclog_or_tty->print_cr("[%d] we were successful with region = "
2770 2778 PTR_FORMAT, task_num, curr_region);
2771 2779 }
2772 2780
2773 2781 if (limit > bottom) {
2774 2782 if (verbose_low()) {
2775 2783 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2776 2784 "returning it ", task_num, curr_region);
2777 2785 }
2778 2786 return curr_region;
2779 2787 } else {
2780 2788 assert(limit == bottom,
2781 2789 "the region limit should be at bottom");
2782 2790 if (verbose_low()) {
2783 2791 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2784 2792 "returning NULL", task_num, curr_region);
2785 2793 }
2786 2794 // we return NULL and the caller should try calling
2787 2795 // claim_region() again.
2788 2796 return NULL;
2789 2797 }
2790 2798 } else {
2791 2799 assert(_finger > finger, "the finger should have moved forward");
2792 2800 if (verbose_low()) {
2793 2801 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2794 2802 "global finger = "PTR_FORMAT", "
2795 2803 "our finger = "PTR_FORMAT,
2796 2804 task_num, _finger, finger);
2797 2805 }
2798 2806
2799 2807 // read it again
2800 2808 finger = _finger;
2801 2809 }
2802 2810 }
2803 2811
2804 2812 return NULL;
2805 2813 }
2806 2814
2807 2815 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2808 2816 bool result = false;
2809 2817 for (int i = 0; i < (int)_max_task_num; ++i) {
2810 2818 CMTask* the_task = _tasks[i];
2811 2819 MemRegion mr = the_task->aborted_region();
2812 2820 if (mr.start() != NULL) {
2813 2821 assert(mr.end() != NULL, "invariant");
2814 2822 assert(mr.word_size() > 0, "invariant");
2815 2823 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2816 2824 assert(hr != NULL, "invariant");
2817 2825 if (hr->in_collection_set()) {
2818 2826 // The region points into the collection set
2819 2827 the_task->set_aborted_region(MemRegion());
2820 2828 result = true;
2821 2829 }
2822 2830 }
2823 2831 }
2824 2832 return result;
2825 2833 }
2826 2834
2827 2835 bool ConcurrentMark::has_aborted_regions() {
2828 2836 for (int i = 0; i < (int)_max_task_num; ++i) {
2829 2837 CMTask* the_task = _tasks[i];
2830 2838 MemRegion mr = the_task->aborted_region();
2831 2839 if (mr.start() != NULL) {
2832 2840 assert(mr.end() != NULL, "invariant");
2833 2841 assert(mr.word_size() > 0, "invariant");
2834 2842 return true;
2835 2843 }
2836 2844 }
2837 2845 return false;
2838 2846 }
2839 2847
2840 2848 void ConcurrentMark::oops_do(OopClosure* cl) {
2841 2849 if (_markStack.size() > 0 && verbose_low()) {
2842 2850 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2843 2851 "size = %d", _markStack.size());
2844 2852 }
2845 2853 // we first iterate over the contents of the mark stack...
2846 2854 _markStack.oops_do(cl);
2847 2855
2848 2856 for (int i = 0; i < (int)_max_task_num; ++i) {
2849 2857 OopTaskQueue* queue = _task_queues->queue((int)i);
2850 2858
2851 2859 if (queue->size() > 0 && verbose_low()) {
2852 2860 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2853 2861 "size = %d", i, queue->size());
2854 2862 }
2855 2863
2856 2864 // ...then over the contents of the all the task queues.
2857 2865 queue->oops_do(cl);
2858 2866 }
2859 2867
2860 2868 // Invalidate any entries, that are in the region stack, that
2861 2869 // point into the collection set
2862 2870 if (_regionStack.invalidate_entries_into_cset()) {
2863 2871 // otherwise, any gray objects copied during the evacuation pause
2864 2872 // might not be visited.
2865 2873 assert(_should_gray_objects, "invariant");
2866 2874 }
2867 2875
2868 2876 // Invalidate any aborted regions, recorded in the individual CM
2869 2877 // tasks, that point into the collection set.
2870 2878 if (invalidate_aborted_regions_in_cset()) {
2871 2879 // otherwise, any gray objects copied during the evacuation pause
2872 2880 // might not be visited.
2873 2881 assert(_should_gray_objects, "invariant");
2874 2882 }
2875 2883
2876 2884 }
2877 2885
2878 2886 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2879 2887 _markStack.setEmpty();
2880 2888 _markStack.clear_overflow();
2881 2889 _regionStack.setEmpty();
2882 2890 _regionStack.clear_overflow();
2883 2891 if (clear_overflow) {
2884 2892 clear_has_overflown();
2885 2893 } else {
2886 2894 assert(has_overflown(), "pre-condition");
2887 2895 }
2888 2896 _finger = _heap_start;
2889 2897
2890 2898 for (int i = 0; i < (int)_max_task_num; ++i) {
2891 2899 OopTaskQueue* queue = _task_queues->queue(i);
2892 2900 queue->set_empty();
2893 2901 // Clear any partial regions from the CMTasks
2894 2902 _tasks[i]->clear_aborted_region();
2895 2903 }
2896 2904 }
2897 2905
2898 2906 void ConcurrentMark::print_stats() {
2899 2907 if (verbose_stats()) {
2900 2908 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2901 2909 for (size_t i = 0; i < _active_tasks; ++i) {
2902 2910 _tasks[i]->print_stats();
2903 2911 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2904 2912 }
2905 2913 }
2906 2914 }
2907 2915
2908 2916 class CSMarkOopClosure: public OopClosure {
2909 2917 friend class CSMarkBitMapClosure;
2910 2918
2911 2919 G1CollectedHeap* _g1h;
2912 2920 CMBitMap* _bm;
2913 2921 ConcurrentMark* _cm;
2914 2922 oop* _ms;
2915 2923 jint* _array_ind_stack;
2916 2924 int _ms_size;
2917 2925 int _ms_ind;
2918 2926 int _array_increment;
2919 2927
2920 2928 bool push(oop obj, int arr_ind = 0) {
2921 2929 if (_ms_ind == _ms_size) {
2922 2930 gclog_or_tty->print_cr("Mark stack is full.");
2923 2931 return false;
2924 2932 }
2925 2933 _ms[_ms_ind] = obj;
2926 2934 if (obj->is_objArray()) {
2927 2935 _array_ind_stack[_ms_ind] = arr_ind;
2928 2936 }
2929 2937 _ms_ind++;
2930 2938 return true;
2931 2939 }
2932 2940
2933 2941 oop pop() {
2934 2942 if (_ms_ind == 0) {
2935 2943 return NULL;
2936 2944 } else {
2937 2945 _ms_ind--;
2938 2946 return _ms[_ms_ind];
2939 2947 }
2940 2948 }
2941 2949
2942 2950 template <class T> bool drain() {
2943 2951 while (_ms_ind > 0) {
2944 2952 oop obj = pop();
2945 2953 assert(obj != NULL, "Since index was non-zero.");
2946 2954 if (obj->is_objArray()) {
2947 2955 jint arr_ind = _array_ind_stack[_ms_ind];
2948 2956 objArrayOop aobj = objArrayOop(obj);
2949 2957 jint len = aobj->length();
2950 2958 jint next_arr_ind = arr_ind + _array_increment;
2951 2959 if (next_arr_ind < len) {
2952 2960 push(obj, next_arr_ind);
2953 2961 }
2954 2962 // Now process this portion of this one.
2955 2963 int lim = MIN2(next_arr_ind, len);
2956 2964 for (int j = arr_ind; j < lim; j++) {
2957 2965 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2958 2966 }
2959 2967
2960 2968 } else {
2961 2969 obj->oop_iterate(this);
2962 2970 }
2963 2971 if (abort()) return false;
2964 2972 }
2965 2973 return true;
2966 2974 }
2967 2975
2968 2976 public:
2969 2977 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2970 2978 _g1h(G1CollectedHeap::heap()),
2971 2979 _cm(cm),
2972 2980 _bm(cm->nextMarkBitMap()),
2973 2981 _ms_size(ms_size), _ms_ind(0),
2974 2982 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2975 2983 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2976 2984 _array_increment(MAX2(ms_size/8, 16))
2977 2985 {}
2978 2986
2979 2987 ~CSMarkOopClosure() {
2980 2988 FREE_C_HEAP_ARRAY(oop, _ms);
2981 2989 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2982 2990 }
2983 2991
2984 2992 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2985 2993 virtual void do_oop( oop* p) { do_oop_work(p); }
2986 2994
2987 2995 template <class T> void do_oop_work(T* p) {
2988 2996 T heap_oop = oopDesc::load_heap_oop(p);
2989 2997 if (oopDesc::is_null(heap_oop)) return;
2990 2998 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2991 2999 if (obj->is_forwarded()) {
2992 3000 // If the object has already been forwarded, we have to make sure
2993 3001 // that it's marked. So follow the forwarding pointer. Note that
2994 3002 // this does the right thing for self-forwarding pointers in the
2995 3003 // evacuation failure case.
2996 3004 obj = obj->forwardee();
2997 3005 }
2998 3006 HeapRegion* hr = _g1h->heap_region_containing(obj);
2999 3007 if (hr != NULL) {
3000 3008 if (hr->in_collection_set()) {
3001 3009 if (_g1h->is_obj_ill(obj)) {
3002 3010 _bm->mark((HeapWord*)obj);
3003 3011 if (!push(obj)) {
3004 3012 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
3005 3013 set_abort();
3006 3014 }
3007 3015 }
3008 3016 } else {
3009 3017 // Outside the collection set; we need to gray it
3010 3018 _cm->deal_with_reference(obj);
3011 3019 }
3012 3020 }
3013 3021 }
3014 3022 };
3015 3023
3016 3024 class CSMarkBitMapClosure: public BitMapClosure {
3017 3025 G1CollectedHeap* _g1h;
3018 3026 CMBitMap* _bitMap;
3019 3027 ConcurrentMark* _cm;
3020 3028 CSMarkOopClosure _oop_cl;
3021 3029 public:
3022 3030 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
3023 3031 _g1h(G1CollectedHeap::heap()),
3024 3032 _bitMap(cm->nextMarkBitMap()),
3025 3033 _oop_cl(cm, ms_size)
3026 3034 {}
3027 3035
3028 3036 ~CSMarkBitMapClosure() {}
3029 3037
3030 3038 bool do_bit(size_t offset) {
3031 3039 // convert offset into a HeapWord*
3032 3040 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3033 3041 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3034 3042 "address out of range");
3035 3043 assert(_bitMap->isMarked(addr), "tautology");
3036 3044 oop obj = oop(addr);
3037 3045 if (!obj->is_forwarded()) {
3038 3046 if (!_oop_cl.push(obj)) return false;
3039 3047 if (UseCompressedOops) {
3040 3048 if (!_oop_cl.drain<narrowOop>()) return false;
3041 3049 } else {
3042 3050 if (!_oop_cl.drain<oop>()) return false;
3043 3051 }
3044 3052 }
3045 3053 // Otherwise...
3046 3054 return true;
3047 3055 }
3048 3056 };
3049 3057
3050 3058
3051 3059 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
3052 3060 CMBitMap* _bm;
3053 3061 CSMarkBitMapClosure _bit_cl;
3054 3062 enum SomePrivateConstants {
3055 3063 MSSize = 1000
3056 3064 };
3057 3065 bool _completed;
3058 3066 public:
3059 3067 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
3060 3068 _bm(cm->nextMarkBitMap()),
3061 3069 _bit_cl(cm, MSSize),
3062 3070 _completed(true)
3063 3071 {}
3064 3072
3065 3073 ~CompleteMarkingInCSHRClosure() {}
3066 3074
3067 3075 bool doHeapRegion(HeapRegion* r) {
3068 3076 if (!r->evacuation_failed()) {
3069 3077 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
3070 3078 if (!mr.is_empty()) {
3071 3079 if (!_bm->iterate(&_bit_cl, mr)) {
3072 3080 _completed = false;
3073 3081 return true;
3074 3082 }
3075 3083 }
3076 3084 }
3077 3085 return false;
3078 3086 }
3079 3087
3080 3088 bool completed() { return _completed; }
3081 3089 };
3082 3090
3083 3091 class ClearMarksInHRClosure: public HeapRegionClosure {
3084 3092 CMBitMap* _bm;
3085 3093 public:
3086 3094 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
3087 3095
3088 3096 bool doHeapRegion(HeapRegion* r) {
3089 3097 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3090 3098 MemRegion usedMR = r->used_region();
3091 3099 _bm->clearRange(r->used_region());
3092 3100 }
3093 3101 return false;
3094 3102 }
3095 3103 };
3096 3104
3097 3105 void ConcurrentMark::complete_marking_in_collection_set() {
3098 3106 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3099 3107
3100 3108 if (!g1h->mark_in_progress()) {
3101 3109 g1h->g1_policy()->record_mark_closure_time(0.0);
3102 3110 return;
3103 3111 }
3104 3112
3105 3113 int i = 1;
3106 3114 double start = os::elapsedTime();
3107 3115 while (true) {
3108 3116 i++;
3109 3117 CompleteMarkingInCSHRClosure cmplt(this);
3110 3118 g1h->collection_set_iterate(&cmplt);
3111 3119 if (cmplt.completed()) break;
3112 3120 }
3113 3121 double end_time = os::elapsedTime();
3114 3122 double elapsed_time_ms = (end_time - start) * 1000.0;
3115 3123 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3116 3124
3117 3125 ClearMarksInHRClosure clr(nextMarkBitMap());
3118 3126 g1h->collection_set_iterate(&clr);
3119 3127 }
3120 3128
3121 3129 // The next two methods deal with the following optimisation. Some
3122 3130 // objects are gray by being marked and located above the finger. If
3123 3131 // they are copied, during an evacuation pause, below the finger then
3124 3132 // the need to be pushed on the stack. The observation is that, if
3125 3133 // there are no regions in the collection set located above the
3126 3134 // finger, then the above cannot happen, hence we do not need to
3127 3135 // explicitly gray any objects when copying them to below the
3128 3136 // finger. The global stack will be scanned to ensure that, if it
3129 3137 // points to objects being copied, it will update their
3130 3138 // location. There is a tricky situation with the gray objects in
3131 3139 // region stack that are being coped, however. See the comment in
3132 3140 // newCSet().
3133 3141
3134 3142 void ConcurrentMark::newCSet() {
3135 3143 if (!concurrent_marking_in_progress()) {
3136 3144 // nothing to do if marking is not in progress
3137 3145 return;
3138 3146 }
3139 3147
3140 3148 // find what the lowest finger is among the global and local fingers
3141 3149 _min_finger = _finger;
3142 3150 for (int i = 0; i < (int)_max_task_num; ++i) {
3143 3151 CMTask* task = _tasks[i];
3144 3152 HeapWord* task_finger = task->finger();
3145 3153 if (task_finger != NULL && task_finger < _min_finger) {
3146 3154 _min_finger = task_finger;
3147 3155 }
3148 3156 }
3149 3157
3150 3158 _should_gray_objects = false;
3151 3159
3152 3160 // This fixes a very subtle and fustrating bug. It might be the case
3153 3161 // that, during en evacuation pause, heap regions that contain
3154 3162 // objects that are gray (by being in regions contained in the
3155 3163 // region stack) are included in the collection set. Since such gray
3156 3164 // objects will be moved, and because it's not easy to redirect
3157 3165 // region stack entries to point to a new location (because objects
3158 3166 // in one region might be scattered to multiple regions after they
3159 3167 // are copied), one option is to ensure that all marked objects
3160 3168 // copied during a pause are pushed on the stack. Notice, however,
3161 3169 // that this problem can only happen when the region stack is not
3162 3170 // empty during an evacuation pause. So, we make the fix a bit less
3163 3171 // conservative and ensure that regions are pushed on the stack,
3164 3172 // irrespective whether all collection set regions are below the
3165 3173 // finger, if the region stack is not empty. This is expected to be
3166 3174 // a rare case, so I don't think it's necessary to be smarted about it.
3167 3175 if (!region_stack_empty() || has_aborted_regions()) {
3168 3176 _should_gray_objects = true;
3169 3177 }
3170 3178 }
3171 3179
3172 3180 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3173 3181 if (!concurrent_marking_in_progress()) return;
3174 3182
3175 3183 HeapWord* region_end = hr->end();
3176 3184 if (region_end > _min_finger) {
3177 3185 _should_gray_objects = true;
3178 3186 }
3179 3187 }
3180 3188
3181 3189 // Resets the region fields of active CMTasks whose values point
3182 3190 // into the collection set.
3183 3191 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3184 3192 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3185 3193 assert(parallel_marking_threads() <= _max_task_num, "sanity");
3186 3194
3187 3195 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3188 3196 CMTask* task = _tasks[i];
3189 3197 HeapWord* task_finger = task->finger();
3190 3198 if (task_finger != NULL) {
3191 3199 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3192 3200 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3193 3201 if (finger_region->in_collection_set()) {
3194 3202 // The task's current region is in the collection set.
3195 3203 // This region will be evacuated in the current GC and
3196 3204 // the region fields in the task will be stale.
3197 3205 task->giveup_current_region();
3198 3206 }
3199 3207 }
3200 3208 }
3201 3209 }
3202 3210
3203 3211 // abandon current marking iteration due to a Full GC
3204 3212 void ConcurrentMark::abort() {
3205 3213 // Clear all marks to force marking thread to do nothing
3206 3214 _nextMarkBitMap->clearAll();
3207 3215 // Empty mark stack
3208 3216 clear_marking_state();
3209 3217 for (int i = 0; i < (int)_max_task_num; ++i) {
3210 3218 _tasks[i]->clear_region_fields();
3211 3219 }
3212 3220 _has_aborted = true;
3213 3221
3214 3222 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3215 3223 satb_mq_set.abandon_partial_marking();
3216 3224 // This can be called either during or outside marking, we'll read
3217 3225 // the expected_active value from the SATB queue set.
3218 3226 satb_mq_set.set_active_all_threads(
3219 3227 false, /* new active value */
3220 3228 satb_mq_set.is_active() /* expected_active */);
3221 3229 }
3222 3230
3223 3231 static void print_ms_time_info(const char* prefix, const char* name,
3224 3232 NumberSeq& ns) {
3225 3233 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3226 3234 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3227 3235 if (ns.num() > 0) {
3228 3236 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3229 3237 prefix, ns.sd(), ns.maximum());
3230 3238 }
3231 3239 }
3232 3240
3233 3241 void ConcurrentMark::print_summary_info() {
3234 3242 gclog_or_tty->print_cr(" Concurrent marking:");
3235 3243 print_ms_time_info(" ", "init marks", _init_times);
3236 3244 print_ms_time_info(" ", "remarks", _remark_times);
3237 3245 {
3238 3246 print_ms_time_info(" ", "final marks", _remark_mark_times);
3239 3247 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3240 3248
3241 3249 }
3242 3250 print_ms_time_info(" ", "cleanups", _cleanup_times);
3243 3251 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3244 3252 _total_counting_time,
3245 3253 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3246 3254 (double)_cleanup_times.num()
3247 3255 : 0.0));
3248 3256 if (G1ScrubRemSets) {
3249 3257 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3250 3258 _total_rs_scrub_time,
3251 3259 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3252 3260 (double)_cleanup_times.num()
3253 3261 : 0.0));
3254 3262 }
3255 3263 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3256 3264 (_init_times.sum() + _remark_times.sum() +
3257 3265 _cleanup_times.sum())/1000.0);
3258 3266 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3259 3267 "(%8.2f s marking, %8.2f s counting).",
3260 3268 cmThread()->vtime_accum(),
3261 3269 cmThread()->vtime_mark_accum(),
3262 3270 cmThread()->vtime_count_accum());
3263 3271 }
3264 3272
3265 3273 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3266 3274 _parallel_workers->print_worker_threads_on(st);
3267 3275 }
3268 3276
3269 3277 // Closures
3270 3278 // XXX: there seems to be a lot of code duplication here;
3271 3279 // should refactor and consolidate the shared code.
3272 3280
3273 3281 // This closure is used to mark refs into the CMS generation in
3274 3282 // the CMS bit map. Called at the first checkpoint.
3275 3283
3276 3284 // We take a break if someone is trying to stop the world.
3277 3285 bool ConcurrentMark::do_yield_check(int worker_i) {
3278 3286 if (should_yield()) {
3279 3287 if (worker_i == 0) {
3280 3288 _g1h->g1_policy()->record_concurrent_pause();
3281 3289 }
3282 3290 cmThread()->yield();
3283 3291 if (worker_i == 0) {
3284 3292 _g1h->g1_policy()->record_concurrent_pause_end();
3285 3293 }
3286 3294 return true;
3287 3295 } else {
3288 3296 return false;
3289 3297 }
3290 3298 }
3291 3299
3292 3300 bool ConcurrentMark::should_yield() {
3293 3301 return cmThread()->should_yield();
3294 3302 }
3295 3303
3296 3304 bool ConcurrentMark::containing_card_is_marked(void* p) {
3297 3305 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3298 3306 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3299 3307 }
3300 3308
3301 3309 bool ConcurrentMark::containing_cards_are_marked(void* start,
3302 3310 void* last) {
3303 3311 return containing_card_is_marked(start) &&
3304 3312 containing_card_is_marked(last);
3305 3313 }
3306 3314
3307 3315 #ifndef PRODUCT
3308 3316 // for debugging purposes
3309 3317 void ConcurrentMark::print_finger() {
3310 3318 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3311 3319 _heap_start, _heap_end, _finger);
3312 3320 for (int i = 0; i < (int) _max_task_num; ++i) {
3313 3321 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
3314 3322 }
3315 3323 gclog_or_tty->print_cr("");
3316 3324 }
3317 3325 #endif
3318 3326
3319 3327 void CMTask::scan_object(oop obj) {
3320 3328 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3321 3329
3322 3330 if (_cm->verbose_high()) {
3323 3331 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3324 3332 _task_id, (void*) obj);
3325 3333 }
3326 3334
3327 3335 size_t obj_size = obj->size();
3328 3336 _words_scanned += obj_size;
3329 3337
3330 3338 obj->oop_iterate(_cm_oop_closure);
3331 3339 statsOnly( ++_objs_scanned );
3332 3340 check_limits();
3333 3341 }
3334 3342
3335 3343 // Closure for iteration over bitmaps
3336 3344 class CMBitMapClosure : public BitMapClosure {
3337 3345 private:
3338 3346 // the bitmap that is being iterated over
3339 3347 CMBitMap* _nextMarkBitMap;
3340 3348 ConcurrentMark* _cm;
3341 3349 CMTask* _task;
3342 3350 // true if we're scanning a heap region claimed by the task (so that
3343 3351 // we move the finger along), false if we're not, i.e. currently when
3344 3352 // scanning a heap region popped from the region stack (so that we
3345 3353 // do not move the task finger along; it'd be a mistake if we did so).
3346 3354 bool _scanning_heap_region;
3347 3355
3348 3356 public:
3349 3357 CMBitMapClosure(CMTask *task,
3350 3358 ConcurrentMark* cm,
3351 3359 CMBitMap* nextMarkBitMap)
3352 3360 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3353 3361
3354 3362 void set_scanning_heap_region(bool scanning_heap_region) {
3355 3363 _scanning_heap_region = scanning_heap_region;
3356 3364 }
3357 3365
3358 3366 bool do_bit(size_t offset) {
3359 3367 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3360 3368 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3361 3369 assert( addr < _cm->finger(), "invariant");
3362 3370
3363 3371 if (_scanning_heap_region) {
3364 3372 statsOnly( _task->increase_objs_found_on_bitmap() );
3365 3373 assert(addr >= _task->finger(), "invariant");
3366 3374 // We move that task's local finger along.
3367 3375 _task->move_finger_to(addr);
3368 3376 } else {
3369 3377 // We move the task's region finger along.
3370 3378 _task->move_region_finger_to(addr);
3371 3379 }
3372 3380
3373 3381 _task->scan_object(oop(addr));
3374 3382 // we only partially drain the local queue and global stack
3375 3383 _task->drain_local_queue(true);
3376 3384 _task->drain_global_stack(true);
3377 3385
3378 3386 // if the has_aborted flag has been raised, we need to bail out of
3379 3387 // the iteration
3380 3388 return !_task->has_aborted();
3381 3389 }
3382 3390 };
3383 3391
3384 3392 // Closure for iterating over objects, currently only used for
3385 3393 // processing SATB buffers.
3386 3394 class CMObjectClosure : public ObjectClosure {
3387 3395 private:
3388 3396 CMTask* _task;
3389 3397
3390 3398 public:
3391 3399 void do_object(oop obj) {
3392 3400 _task->deal_with_reference(obj);
3393 3401 }
3394 3402
↓ open down ↓ |
1082 lines elided |
↑ open up ↑ |
3395 3403 CMObjectClosure(CMTask* task) : _task(task) { }
3396 3404 };
3397 3405
3398 3406 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3399 3407 ConcurrentMark* cm,
3400 3408 CMTask* task)
3401 3409 : _g1h(g1h), _cm(cm), _task(task) {
3402 3410 assert(_ref_processor == NULL, "should be initialized to NULL");
3403 3411
3404 3412 if (G1UseConcMarkReferenceProcessing) {
3405 - _ref_processor = g1h->ref_processor();
3413 + _ref_processor = g1h->ref_processor_cm();
3406 3414 assert(_ref_processor != NULL, "should not be NULL");
3407 3415 }
3408 3416 }
3409 3417
3410 3418 void CMTask::setup_for_region(HeapRegion* hr) {
3411 3419 // Separated the asserts so that we know which one fires.
3412 3420 assert(hr != NULL,
3413 3421 "claim_region() should have filtered out continues humongous regions");
3414 3422 assert(!hr->continuesHumongous(),
3415 3423 "claim_region() should have filtered out continues humongous regions");
3416 3424
3417 3425 if (_cm->verbose_low()) {
3418 3426 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3419 3427 _task_id, hr);
3420 3428 }
3421 3429
3422 3430 _curr_region = hr;
3423 3431 _finger = hr->bottom();
3424 3432 update_region_limit();
3425 3433 }
3426 3434
3427 3435 void CMTask::update_region_limit() {
3428 3436 HeapRegion* hr = _curr_region;
3429 3437 HeapWord* bottom = hr->bottom();
3430 3438 HeapWord* limit = hr->next_top_at_mark_start();
3431 3439
3432 3440 if (limit == bottom) {
3433 3441 if (_cm->verbose_low()) {
3434 3442 gclog_or_tty->print_cr("[%d] found an empty region "
3435 3443 "["PTR_FORMAT", "PTR_FORMAT")",
3436 3444 _task_id, bottom, limit);
3437 3445 }
3438 3446 // The region was collected underneath our feet.
3439 3447 // We set the finger to bottom to ensure that the bitmap
3440 3448 // iteration that will follow this will not do anything.
3441 3449 // (this is not a condition that holds when we set the region up,
3442 3450 // as the region is not supposed to be empty in the first place)
3443 3451 _finger = bottom;
3444 3452 } else if (limit >= _region_limit) {
3445 3453 assert(limit >= _finger, "peace of mind");
3446 3454 } else {
3447 3455 assert(limit < _region_limit, "only way to get here");
3448 3456 // This can happen under some pretty unusual circumstances. An
3449 3457 // evacuation pause empties the region underneath our feet (NTAMS
3450 3458 // at bottom). We then do some allocation in the region (NTAMS
3451 3459 // stays at bottom), followed by the region being used as a GC
3452 3460 // alloc region (NTAMS will move to top() and the objects
3453 3461 // originally below it will be grayed). All objects now marked in
3454 3462 // the region are explicitly grayed, if below the global finger,
3455 3463 // and we do not need in fact to scan anything else. So, we simply
3456 3464 // set _finger to be limit to ensure that the bitmap iteration
3457 3465 // doesn't do anything.
3458 3466 _finger = limit;
3459 3467 }
3460 3468
3461 3469 _region_limit = limit;
3462 3470 }
3463 3471
3464 3472 void CMTask::giveup_current_region() {
3465 3473 assert(_curr_region != NULL, "invariant");
3466 3474 if (_cm->verbose_low()) {
3467 3475 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3468 3476 _task_id, _curr_region);
3469 3477 }
3470 3478 clear_region_fields();
3471 3479 }
3472 3480
3473 3481 void CMTask::clear_region_fields() {
3474 3482 // Values for these three fields that indicate that we're not
3475 3483 // holding on to a region.
3476 3484 _curr_region = NULL;
3477 3485 _finger = NULL;
3478 3486 _region_limit = NULL;
3479 3487
3480 3488 _region_finger = NULL;
3481 3489 }
3482 3490
3483 3491 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3484 3492 if (cm_oop_closure == NULL) {
3485 3493 assert(_cm_oop_closure != NULL, "invariant");
3486 3494 } else {
3487 3495 assert(_cm_oop_closure == NULL, "invariant");
3488 3496 }
3489 3497 _cm_oop_closure = cm_oop_closure;
3490 3498 }
3491 3499
3492 3500 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3493 3501 guarantee(nextMarkBitMap != NULL, "invariant");
3494 3502
3495 3503 if (_cm->verbose_low()) {
3496 3504 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3497 3505 }
3498 3506
3499 3507 _nextMarkBitMap = nextMarkBitMap;
3500 3508 clear_region_fields();
3501 3509 assert(_aborted_region.is_empty(), "should have been cleared");
3502 3510
3503 3511 _calls = 0;
3504 3512 _elapsed_time_ms = 0.0;
3505 3513 _termination_time_ms = 0.0;
3506 3514 _termination_start_time_ms = 0.0;
3507 3515
3508 3516 #if _MARKING_STATS_
3509 3517 _local_pushes = 0;
3510 3518 _local_pops = 0;
3511 3519 _local_max_size = 0;
3512 3520 _objs_scanned = 0;
3513 3521 _global_pushes = 0;
3514 3522 _global_pops = 0;
3515 3523 _global_max_size = 0;
3516 3524 _global_transfers_to = 0;
3517 3525 _global_transfers_from = 0;
3518 3526 _region_stack_pops = 0;
3519 3527 _regions_claimed = 0;
3520 3528 _objs_found_on_bitmap = 0;
3521 3529 _satb_buffers_processed = 0;
3522 3530 _steal_attempts = 0;
3523 3531 _steals = 0;
3524 3532 _aborted = 0;
3525 3533 _aborted_overflow = 0;
3526 3534 _aborted_cm_aborted = 0;
3527 3535 _aborted_yield = 0;
3528 3536 _aborted_timed_out = 0;
3529 3537 _aborted_satb = 0;
3530 3538 _aborted_termination = 0;
3531 3539 #endif // _MARKING_STATS_
3532 3540 }
3533 3541
3534 3542 bool CMTask::should_exit_termination() {
3535 3543 regular_clock_call();
3536 3544 // This is called when we are in the termination protocol. We should
3537 3545 // quit if, for some reason, this task wants to abort or the global
3538 3546 // stack is not empty (this means that we can get work from it).
3539 3547 return !_cm->mark_stack_empty() || has_aborted();
3540 3548 }
3541 3549
3542 3550 void CMTask::reached_limit() {
3543 3551 assert(_words_scanned >= _words_scanned_limit ||
3544 3552 _refs_reached >= _refs_reached_limit ,
3545 3553 "shouldn't have been called otherwise");
3546 3554 regular_clock_call();
3547 3555 }
3548 3556
3549 3557 void CMTask::regular_clock_call() {
3550 3558 if (has_aborted()) return;
3551 3559
3552 3560 // First, we need to recalculate the words scanned and refs reached
3553 3561 // limits for the next clock call.
3554 3562 recalculate_limits();
3555 3563
3556 3564 // During the regular clock call we do the following
3557 3565
3558 3566 // (1) If an overflow has been flagged, then we abort.
3559 3567 if (_cm->has_overflown()) {
3560 3568 set_has_aborted();
3561 3569 return;
3562 3570 }
3563 3571
3564 3572 // If we are not concurrent (i.e. we're doing remark) we don't need
3565 3573 // to check anything else. The other steps are only needed during
3566 3574 // the concurrent marking phase.
3567 3575 if (!concurrent()) return;
3568 3576
3569 3577 // (2) If marking has been aborted for Full GC, then we also abort.
3570 3578 if (_cm->has_aborted()) {
3571 3579 set_has_aborted();
3572 3580 statsOnly( ++_aborted_cm_aborted );
3573 3581 return;
3574 3582 }
3575 3583
3576 3584 double curr_time_ms = os::elapsedVTime() * 1000.0;
3577 3585
3578 3586 // (3) If marking stats are enabled, then we update the step history.
3579 3587 #if _MARKING_STATS_
3580 3588 if (_words_scanned >= _words_scanned_limit) {
3581 3589 ++_clock_due_to_scanning;
3582 3590 }
3583 3591 if (_refs_reached >= _refs_reached_limit) {
3584 3592 ++_clock_due_to_marking;
3585 3593 }
3586 3594
3587 3595 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3588 3596 _interval_start_time_ms = curr_time_ms;
3589 3597 _all_clock_intervals_ms.add(last_interval_ms);
3590 3598
3591 3599 if (_cm->verbose_medium()) {
3592 3600 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3593 3601 "scanned = %d%s, refs reached = %d%s",
3594 3602 _task_id, last_interval_ms,
3595 3603 _words_scanned,
3596 3604 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3597 3605 _refs_reached,
3598 3606 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3599 3607 }
3600 3608 #endif // _MARKING_STATS_
3601 3609
3602 3610 // (4) We check whether we should yield. If we have to, then we abort.
3603 3611 if (_cm->should_yield()) {
3604 3612 // We should yield. To do this we abort the task. The caller is
3605 3613 // responsible for yielding.
3606 3614 set_has_aborted();
3607 3615 statsOnly( ++_aborted_yield );
3608 3616 return;
3609 3617 }
3610 3618
3611 3619 // (5) We check whether we've reached our time quota. If we have,
3612 3620 // then we abort.
3613 3621 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3614 3622 if (elapsed_time_ms > _time_target_ms) {
3615 3623 set_has_aborted();
3616 3624 _has_timed_out = true;
3617 3625 statsOnly( ++_aborted_timed_out );
3618 3626 return;
3619 3627 }
3620 3628
3621 3629 // (6) Finally, we check whether there are enough completed STAB
3622 3630 // buffers available for processing. If there are, we abort.
3623 3631 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3624 3632 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3625 3633 if (_cm->verbose_low()) {
3626 3634 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3627 3635 _task_id);
3628 3636 }
3629 3637 // we do need to process SATB buffers, we'll abort and restart
3630 3638 // the marking task to do so
3631 3639 set_has_aborted();
3632 3640 statsOnly( ++_aborted_satb );
3633 3641 return;
3634 3642 }
3635 3643 }
3636 3644
3637 3645 void CMTask::recalculate_limits() {
3638 3646 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3639 3647 _words_scanned_limit = _real_words_scanned_limit;
3640 3648
3641 3649 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3642 3650 _refs_reached_limit = _real_refs_reached_limit;
3643 3651 }
3644 3652
3645 3653 void CMTask::decrease_limits() {
3646 3654 // This is called when we believe that we're going to do an infrequent
3647 3655 // operation which will increase the per byte scanned cost (i.e. move
3648 3656 // entries to/from the global stack). It basically tries to decrease the
3649 3657 // scanning limit so that the clock is called earlier.
3650 3658
3651 3659 if (_cm->verbose_medium()) {
3652 3660 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3653 3661 }
3654 3662
3655 3663 _words_scanned_limit = _real_words_scanned_limit -
3656 3664 3 * words_scanned_period / 4;
3657 3665 _refs_reached_limit = _real_refs_reached_limit -
3658 3666 3 * refs_reached_period / 4;
3659 3667 }
3660 3668
3661 3669 void CMTask::move_entries_to_global_stack() {
3662 3670 // local array where we'll store the entries that will be popped
3663 3671 // from the local queue
3664 3672 oop buffer[global_stack_transfer_size];
3665 3673
3666 3674 int n = 0;
3667 3675 oop obj;
3668 3676 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3669 3677 buffer[n] = obj;
3670 3678 ++n;
3671 3679 }
3672 3680
3673 3681 if (n > 0) {
3674 3682 // we popped at least one entry from the local queue
3675 3683
3676 3684 statsOnly( ++_global_transfers_to; _local_pops += n );
3677 3685
3678 3686 if (!_cm->mark_stack_push(buffer, n)) {
3679 3687 if (_cm->verbose_low()) {
3680 3688 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3681 3689 _task_id);
3682 3690 }
3683 3691 set_has_aborted();
3684 3692 } else {
3685 3693 // the transfer was successful
3686 3694
3687 3695 if (_cm->verbose_medium()) {
3688 3696 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3689 3697 _task_id, n);
3690 3698 }
3691 3699 statsOnly( int tmp_size = _cm->mark_stack_size();
3692 3700 if (tmp_size > _global_max_size) {
3693 3701 _global_max_size = tmp_size;
3694 3702 }
3695 3703 _global_pushes += n );
3696 3704 }
3697 3705 }
3698 3706
3699 3707 // this operation was quite expensive, so decrease the limits
3700 3708 decrease_limits();
3701 3709 }
3702 3710
3703 3711 void CMTask::get_entries_from_global_stack() {
3704 3712 // local array where we'll store the entries that will be popped
3705 3713 // from the global stack.
3706 3714 oop buffer[global_stack_transfer_size];
3707 3715 int n;
3708 3716 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3709 3717 assert(n <= global_stack_transfer_size,
3710 3718 "we should not pop more than the given limit");
3711 3719 if (n > 0) {
3712 3720 // yes, we did actually pop at least one entry
3713 3721
3714 3722 statsOnly( ++_global_transfers_from; _global_pops += n );
3715 3723 if (_cm->verbose_medium()) {
3716 3724 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3717 3725 _task_id, n);
3718 3726 }
3719 3727 for (int i = 0; i < n; ++i) {
3720 3728 bool success = _task_queue->push(buffer[i]);
3721 3729 // We only call this when the local queue is empty or under a
3722 3730 // given target limit. So, we do not expect this push to fail.
3723 3731 assert(success, "invariant");
3724 3732 }
3725 3733
3726 3734 statsOnly( int tmp_size = _task_queue->size();
3727 3735 if (tmp_size > _local_max_size) {
3728 3736 _local_max_size = tmp_size;
3729 3737 }
3730 3738 _local_pushes += n );
3731 3739 }
3732 3740
3733 3741 // this operation was quite expensive, so decrease the limits
3734 3742 decrease_limits();
3735 3743 }
3736 3744
3737 3745 void CMTask::drain_local_queue(bool partially) {
3738 3746 if (has_aborted()) return;
3739 3747
3740 3748 // Decide what the target size is, depending whether we're going to
3741 3749 // drain it partially (so that other tasks can steal if they run out
3742 3750 // of things to do) or totally (at the very end).
3743 3751 size_t target_size;
3744 3752 if (partially) {
3745 3753 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3746 3754 } else {
3747 3755 target_size = 0;
3748 3756 }
3749 3757
3750 3758 if (_task_queue->size() > target_size) {
3751 3759 if (_cm->verbose_high()) {
3752 3760 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3753 3761 _task_id, target_size);
3754 3762 }
3755 3763
3756 3764 oop obj;
3757 3765 bool ret = _task_queue->pop_local(obj);
3758 3766 while (ret) {
3759 3767 statsOnly( ++_local_pops );
3760 3768
3761 3769 if (_cm->verbose_high()) {
3762 3770 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3763 3771 (void*) obj);
3764 3772 }
3765 3773
3766 3774 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3767 3775 assert(!_g1h->is_on_master_free_list(
3768 3776 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3769 3777
3770 3778 scan_object(obj);
3771 3779
3772 3780 if (_task_queue->size() <= target_size || has_aborted()) {
3773 3781 ret = false;
3774 3782 } else {
3775 3783 ret = _task_queue->pop_local(obj);
3776 3784 }
3777 3785 }
3778 3786
3779 3787 if (_cm->verbose_high()) {
3780 3788 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3781 3789 _task_id, _task_queue->size());
3782 3790 }
3783 3791 }
3784 3792 }
3785 3793
3786 3794 void CMTask::drain_global_stack(bool partially) {
3787 3795 if (has_aborted()) return;
3788 3796
3789 3797 // We have a policy to drain the local queue before we attempt to
3790 3798 // drain the global stack.
3791 3799 assert(partially || _task_queue->size() == 0, "invariant");
3792 3800
3793 3801 // Decide what the target size is, depending whether we're going to
3794 3802 // drain it partially (so that other tasks can steal if they run out
3795 3803 // of things to do) or totally (at the very end). Notice that,
3796 3804 // because we move entries from the global stack in chunks or
3797 3805 // because another task might be doing the same, we might in fact
3798 3806 // drop below the target. But, this is not a problem.
3799 3807 size_t target_size;
3800 3808 if (partially) {
3801 3809 target_size = _cm->partial_mark_stack_size_target();
3802 3810 } else {
3803 3811 target_size = 0;
3804 3812 }
3805 3813
3806 3814 if (_cm->mark_stack_size() > target_size) {
3807 3815 if (_cm->verbose_low()) {
3808 3816 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3809 3817 _task_id, target_size);
3810 3818 }
3811 3819
3812 3820 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3813 3821 get_entries_from_global_stack();
3814 3822 drain_local_queue(partially);
3815 3823 }
3816 3824
3817 3825 if (_cm->verbose_low()) {
3818 3826 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3819 3827 _task_id, _cm->mark_stack_size());
3820 3828 }
3821 3829 }
3822 3830 }
3823 3831
3824 3832 // SATB Queue has several assumptions on whether to call the par or
3825 3833 // non-par versions of the methods. this is why some of the code is
3826 3834 // replicated. We should really get rid of the single-threaded version
3827 3835 // of the code to simplify things.
3828 3836 void CMTask::drain_satb_buffers() {
3829 3837 if (has_aborted()) return;
3830 3838
3831 3839 // We set this so that the regular clock knows that we're in the
3832 3840 // middle of draining buffers and doesn't set the abort flag when it
3833 3841 // notices that SATB buffers are available for draining. It'd be
3834 3842 // very counter productive if it did that. :-)
3835 3843 _draining_satb_buffers = true;
3836 3844
3837 3845 CMObjectClosure oc(this);
3838 3846 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3839 3847 if (G1CollectedHeap::use_parallel_gc_threads()) {
3840 3848 satb_mq_set.set_par_closure(_task_id, &oc);
3841 3849 } else {
3842 3850 satb_mq_set.set_closure(&oc);
3843 3851 }
3844 3852
3845 3853 // This keeps claiming and applying the closure to completed buffers
3846 3854 // until we run out of buffers or we need to abort.
3847 3855 if (G1CollectedHeap::use_parallel_gc_threads()) {
3848 3856 while (!has_aborted() &&
3849 3857 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3850 3858 if (_cm->verbose_medium()) {
3851 3859 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3852 3860 }
3853 3861 statsOnly( ++_satb_buffers_processed );
3854 3862 regular_clock_call();
3855 3863 }
3856 3864 } else {
3857 3865 while (!has_aborted() &&
3858 3866 satb_mq_set.apply_closure_to_completed_buffer()) {
3859 3867 if (_cm->verbose_medium()) {
3860 3868 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3861 3869 }
3862 3870 statsOnly( ++_satb_buffers_processed );
3863 3871 regular_clock_call();
3864 3872 }
3865 3873 }
3866 3874
3867 3875 if (!concurrent() && !has_aborted()) {
3868 3876 // We should only do this during remark.
3869 3877 if (G1CollectedHeap::use_parallel_gc_threads()) {
3870 3878 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3871 3879 } else {
3872 3880 satb_mq_set.iterate_closure_all_threads();
3873 3881 }
3874 3882 }
3875 3883
3876 3884 _draining_satb_buffers = false;
3877 3885
3878 3886 assert(has_aborted() ||
3879 3887 concurrent() ||
3880 3888 satb_mq_set.completed_buffers_num() == 0, "invariant");
3881 3889
3882 3890 if (G1CollectedHeap::use_parallel_gc_threads()) {
3883 3891 satb_mq_set.set_par_closure(_task_id, NULL);
3884 3892 } else {
3885 3893 satb_mq_set.set_closure(NULL);
3886 3894 }
3887 3895
3888 3896 // again, this was a potentially expensive operation, decrease the
3889 3897 // limits to get the regular clock call early
3890 3898 decrease_limits();
3891 3899 }
3892 3900
3893 3901 void CMTask::drain_region_stack(BitMapClosure* bc) {
3894 3902 if (has_aborted()) return;
3895 3903
3896 3904 assert(_region_finger == NULL,
3897 3905 "it should be NULL when we're not scanning a region");
3898 3906
3899 3907 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3900 3908 if (_cm->verbose_low()) {
3901 3909 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3902 3910 _task_id, _cm->region_stack_size());
3903 3911 }
3904 3912
3905 3913 MemRegion mr;
3906 3914
3907 3915 if (!_aborted_region.is_empty()) {
3908 3916 mr = _aborted_region;
3909 3917 _aborted_region = MemRegion();
3910 3918
3911 3919 if (_cm->verbose_low()) {
3912 3920 gclog_or_tty->print_cr("[%d] scanning aborted region "
3913 3921 "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3914 3922 _task_id, mr.start(), mr.end());
3915 3923 }
3916 3924 } else {
3917 3925 mr = _cm->region_stack_pop_lock_free();
3918 3926 // it returns MemRegion() if the pop fails
3919 3927 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3920 3928 }
3921 3929
3922 3930 while (mr.start() != NULL) {
3923 3931 if (_cm->verbose_medium()) {
3924 3932 gclog_or_tty->print_cr("[%d] we are scanning region "
3925 3933 "["PTR_FORMAT", "PTR_FORMAT")",
3926 3934 _task_id, mr.start(), mr.end());
3927 3935 }
3928 3936
3929 3937 assert(mr.end() <= _cm->finger(),
3930 3938 "otherwise the region shouldn't be on the stack");
3931 3939 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3932 3940 if (_nextMarkBitMap->iterate(bc, mr)) {
3933 3941 assert(!has_aborted(),
3934 3942 "cannot abort the task without aborting the bitmap iteration");
3935 3943
3936 3944 // We finished iterating over the region without aborting.
3937 3945 regular_clock_call();
3938 3946 if (has_aborted()) {
3939 3947 mr = MemRegion();
3940 3948 } else {
3941 3949 mr = _cm->region_stack_pop_lock_free();
3942 3950 // it returns MemRegion() if the pop fails
3943 3951 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3944 3952 }
3945 3953 } else {
3946 3954 assert(has_aborted(), "currently the only way to do so");
3947 3955
3948 3956 // The only way to abort the bitmap iteration is to return
3949 3957 // false from the do_bit() method. However, inside the
3950 3958 // do_bit() method we move the _region_finger to point to the
3951 3959 // object currently being looked at. So, if we bail out, we
3952 3960 // have definitely set _region_finger to something non-null.
3953 3961 assert(_region_finger != NULL, "invariant");
3954 3962
3955 3963 // Make sure that any previously aborted region has been
3956 3964 // cleared.
3957 3965 assert(_aborted_region.is_empty(), "aborted region not cleared");
3958 3966
3959 3967 // The iteration was actually aborted. So now _region_finger
3960 3968 // points to the address of the object we last scanned. If we
3961 3969 // leave it there, when we restart this task, we will rescan
3962 3970 // the object. It is easy to avoid this. We move the finger by
3963 3971 // enough to point to the next possible object header (the
3964 3972 // bitmap knows by how much we need to move it as it knows its
3965 3973 // granularity).
3966 3974 MemRegion newRegion =
3967 3975 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3968 3976
3969 3977 if (!newRegion.is_empty()) {
3970 3978 if (_cm->verbose_low()) {
3971 3979 gclog_or_tty->print_cr("[%d] recording unscanned region"
3972 3980 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3973 3981 _task_id,
3974 3982 newRegion.start(), newRegion.end());
3975 3983 }
3976 3984 // Now record the part of the region we didn't scan to
3977 3985 // make sure this task scans it later.
3978 3986 _aborted_region = newRegion;
3979 3987 }
3980 3988 // break from while
3981 3989 mr = MemRegion();
3982 3990 }
3983 3991 _region_finger = NULL;
3984 3992 }
3985 3993
3986 3994 if (_cm->verbose_low()) {
3987 3995 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3988 3996 _task_id, _cm->region_stack_size());
3989 3997 }
3990 3998 }
3991 3999 }
3992 4000
3993 4001 void CMTask::print_stats() {
3994 4002 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3995 4003 _task_id, _calls);
3996 4004 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3997 4005 _elapsed_time_ms, _termination_time_ms);
3998 4006 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3999 4007 _step_times_ms.num(), _step_times_ms.avg(),
4000 4008 _step_times_ms.sd());
4001 4009 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4002 4010 _step_times_ms.maximum(), _step_times_ms.sum());
4003 4011
4004 4012 #if _MARKING_STATS_
4005 4013 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4006 4014 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4007 4015 _all_clock_intervals_ms.sd());
4008 4016 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4009 4017 _all_clock_intervals_ms.maximum(),
4010 4018 _all_clock_intervals_ms.sum());
4011 4019 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
4012 4020 _clock_due_to_scanning, _clock_due_to_marking);
4013 4021 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
4014 4022 _objs_scanned, _objs_found_on_bitmap);
4015 4023 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
4016 4024 _local_pushes, _local_pops, _local_max_size);
4017 4025 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
4018 4026 _global_pushes, _global_pops, _global_max_size);
4019 4027 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
4020 4028 _global_transfers_to,_global_transfers_from);
4021 4029 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
4022 4030 _regions_claimed, _region_stack_pops);
4023 4031 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
4024 4032 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
4025 4033 _steal_attempts, _steals);
4026 4034 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
4027 4035 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
4028 4036 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4029 4037 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
4030 4038 _aborted_timed_out, _aborted_satb, _aborted_termination);
4031 4039 #endif // _MARKING_STATS_
4032 4040 }
4033 4041
4034 4042 /*****************************************************************************
4035 4043
4036 4044 The do_marking_step(time_target_ms) method is the building block
4037 4045 of the parallel marking framework. It can be called in parallel
4038 4046 with other invocations of do_marking_step() on different tasks
4039 4047 (but only one per task, obviously) and concurrently with the
4040 4048 mutator threads, or during remark, hence it eliminates the need
4041 4049 for two versions of the code. When called during remark, it will
4042 4050 pick up from where the task left off during the concurrent marking
4043 4051 phase. Interestingly, tasks are also claimable during evacuation
4044 4052 pauses too, since do_marking_step() ensures that it aborts before
4045 4053 it needs to yield.
4046 4054
4047 4055 The data structures that is uses to do marking work are the
4048 4056 following:
4049 4057
4050 4058 (1) Marking Bitmap. If there are gray objects that appear only
4051 4059 on the bitmap (this happens either when dealing with an overflow
4052 4060 or when the initial marking phase has simply marked the roots
4053 4061 and didn't push them on the stack), then tasks claim heap
4054 4062 regions whose bitmap they then scan to find gray objects. A
4055 4063 global finger indicates where the end of the last claimed region
4056 4064 is. A local finger indicates how far into the region a task has
4057 4065 scanned. The two fingers are used to determine how to gray an
4058 4066 object (i.e. whether simply marking it is OK, as it will be
4059 4067 visited by a task in the future, or whether it needs to be also
4060 4068 pushed on a stack).
4061 4069
4062 4070 (2) Local Queue. The local queue of the task which is accessed
4063 4071 reasonably efficiently by the task. Other tasks can steal from
4064 4072 it when they run out of work. Throughout the marking phase, a
4065 4073 task attempts to keep its local queue short but not totally
4066 4074 empty, so that entries are available for stealing by other
4067 4075 tasks. Only when there is no more work, a task will totally
4068 4076 drain its local queue.
4069 4077
4070 4078 (3) Global Mark Stack. This handles local queue overflow. During
4071 4079 marking only sets of entries are moved between it and the local
4072 4080 queues, as access to it requires a mutex and more fine-grain
4073 4081 interaction with it which might cause contention. If it
4074 4082 overflows, then the marking phase should restart and iterate
4075 4083 over the bitmap to identify gray objects. Throughout the marking
4076 4084 phase, tasks attempt to keep the global mark stack at a small
4077 4085 length but not totally empty, so that entries are available for
4078 4086 popping by other tasks. Only when there is no more work, tasks
4079 4087 will totally drain the global mark stack.
4080 4088
4081 4089 (4) Global Region Stack. Entries on it correspond to areas of
4082 4090 the bitmap that need to be scanned since they contain gray
4083 4091 objects. Pushes on the region stack only happen during
4084 4092 evacuation pauses and typically correspond to areas covered by
4085 4093 GC LABS. If it overflows, then the marking phase should restart
4086 4094 and iterate over the bitmap to identify gray objects. Tasks will
4087 4095 try to totally drain the region stack as soon as possible.
4088 4096
4089 4097 (5) SATB Buffer Queue. This is where completed SATB buffers are
4090 4098 made available. Buffers are regularly removed from this queue
4091 4099 and scanned for roots, so that the queue doesn't get too
4092 4100 long. During remark, all completed buffers are processed, as
4093 4101 well as the filled in parts of any uncompleted buffers.
4094 4102
4095 4103 The do_marking_step() method tries to abort when the time target
4096 4104 has been reached. There are a few other cases when the
4097 4105 do_marking_step() method also aborts:
4098 4106
4099 4107 (1) When the marking phase has been aborted (after a Full GC).
4100 4108
4101 4109 (2) When a global overflow (either on the global stack or the
4102 4110 region stack) has been triggered. Before the task aborts, it
4103 4111 will actually sync up with the other tasks to ensure that all
4104 4112 the marking data structures (local queues, stacks, fingers etc.)
4105 4113 are re-initialised so that when do_marking_step() completes,
4106 4114 the marking phase can immediately restart.
4107 4115
4108 4116 (3) When enough completed SATB buffers are available. The
4109 4117 do_marking_step() method only tries to drain SATB buffers right
4110 4118 at the beginning. So, if enough buffers are available, the
4111 4119 marking step aborts and the SATB buffers are processed at
4112 4120 the beginning of the next invocation.
4113 4121
4114 4122 (4) To yield. when we have to yield then we abort and yield
4115 4123 right at the end of do_marking_step(). This saves us from a lot
4116 4124 of hassle as, by yielding we might allow a Full GC. If this
4117 4125 happens then objects will be compacted underneath our feet, the
4118 4126 heap might shrink, etc. We save checking for this by just
4119 4127 aborting and doing the yield right at the end.
4120 4128
4121 4129 From the above it follows that the do_marking_step() method should
4122 4130 be called in a loop (or, otherwise, regularly) until it completes.
4123 4131
4124 4132 If a marking step completes without its has_aborted() flag being
4125 4133 true, it means it has completed the current marking phase (and
4126 4134 also all other marking tasks have done so and have all synced up).
4127 4135
4128 4136 A method called regular_clock_call() is invoked "regularly" (in
4129 4137 sub ms intervals) throughout marking. It is this clock method that
4130 4138 checks all the abort conditions which were mentioned above and
4131 4139 decides when the task should abort. A work-based scheme is used to
4132 4140 trigger this clock method: when the number of object words the
4133 4141 marking phase has scanned or the number of references the marking
4134 4142 phase has visited reach a given limit. Additional invocations to
4135 4143 the method clock have been planted in a few other strategic places
4136 4144 too. The initial reason for the clock method was to avoid calling
4137 4145 vtime too regularly, as it is quite expensive. So, once it was in
4138 4146 place, it was natural to piggy-back all the other conditions on it
4139 4147 too and not constantly check them throughout the code.
4140 4148
4141 4149 *****************************************************************************/
4142 4150
4143 4151 void CMTask::do_marking_step(double time_target_ms,
4144 4152 bool do_stealing,
4145 4153 bool do_termination) {
4146 4154 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4147 4155 assert(concurrent() == _cm->concurrent(), "they should be the same");
4148 4156
4149 4157 assert(concurrent() || _cm->region_stack_empty(),
4150 4158 "the region stack should have been cleared before remark");
4151 4159 assert(concurrent() || !_cm->has_aborted_regions(),
4152 4160 "aborted regions should have been cleared before remark");
4153 4161 assert(_region_finger == NULL,
4154 4162 "this should be non-null only when a region is being scanned");
4155 4163
4156 4164 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4157 4165 assert(_task_queues != NULL, "invariant");
4158 4166 assert(_task_queue != NULL, "invariant");
4159 4167 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4160 4168
4161 4169 assert(!_claimed,
4162 4170 "only one thread should claim this task at any one time");
4163 4171
4164 4172 // OK, this doesn't safeguard again all possible scenarios, as it is
4165 4173 // possible for two threads to set the _claimed flag at the same
4166 4174 // time. But it is only for debugging purposes anyway and it will
4167 4175 // catch most problems.
4168 4176 _claimed = true;
4169 4177
4170 4178 _start_time_ms = os::elapsedVTime() * 1000.0;
4171 4179 statsOnly( _interval_start_time_ms = _start_time_ms );
4172 4180
4173 4181 double diff_prediction_ms =
4174 4182 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4175 4183 _time_target_ms = time_target_ms - diff_prediction_ms;
4176 4184
4177 4185 // set up the variables that are used in the work-based scheme to
4178 4186 // call the regular clock method
4179 4187 _words_scanned = 0;
4180 4188 _refs_reached = 0;
4181 4189 recalculate_limits();
4182 4190
4183 4191 // clear all flags
4184 4192 clear_has_aborted();
4185 4193 _has_timed_out = false;
4186 4194 _draining_satb_buffers = false;
4187 4195
4188 4196 ++_calls;
4189 4197
4190 4198 if (_cm->verbose_low()) {
4191 4199 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4192 4200 "target = %1.2lfms >>>>>>>>>>",
4193 4201 _task_id, _calls, _time_target_ms);
4194 4202 }
4195 4203
4196 4204 // Set up the bitmap and oop closures. Anything that uses them is
4197 4205 // eventually called from this method, so it is OK to allocate these
4198 4206 // statically.
4199 4207 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4200 4208 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4201 4209 set_cm_oop_closure(&cm_oop_closure);
4202 4210
4203 4211 if (_cm->has_overflown()) {
4204 4212 // This can happen if the region stack or the mark stack overflows
4205 4213 // during a GC pause and this task, after a yield point,
4206 4214 // restarts. We have to abort as we need to get into the overflow
4207 4215 // protocol which happens right at the end of this task.
4208 4216 set_has_aborted();
4209 4217 }
4210 4218
4211 4219 // First drain any available SATB buffers. After this, we will not
4212 4220 // look at SATB buffers before the next invocation of this method.
4213 4221 // If enough completed SATB buffers are queued up, the regular clock
4214 4222 // will abort this task so that it restarts.
4215 4223 drain_satb_buffers();
4216 4224 // ...then partially drain the local queue and the global stack
4217 4225 drain_local_queue(true);
4218 4226 drain_global_stack(true);
4219 4227
4220 4228 // Then totally drain the region stack. We will not look at
4221 4229 // it again before the next invocation of this method. Entries on
4222 4230 // the region stack are only added during evacuation pauses, for
4223 4231 // which we have to yield. When we do, we abort the task anyway so
4224 4232 // it will look at the region stack again when it restarts.
4225 4233 bitmap_closure.set_scanning_heap_region(false);
4226 4234 drain_region_stack(&bitmap_closure);
4227 4235 // ...then partially drain the local queue and the global stack
4228 4236 drain_local_queue(true);
4229 4237 drain_global_stack(true);
4230 4238
4231 4239 do {
4232 4240 if (!has_aborted() && _curr_region != NULL) {
4233 4241 // This means that we're already holding on to a region.
4234 4242 assert(_finger != NULL, "if region is not NULL, then the finger "
4235 4243 "should not be NULL either");
4236 4244
4237 4245 // We might have restarted this task after an evacuation pause
4238 4246 // which might have evacuated the region we're holding on to
4239 4247 // underneath our feet. Let's read its limit again to make sure
4240 4248 // that we do not iterate over a region of the heap that
4241 4249 // contains garbage (update_region_limit() will also move
4242 4250 // _finger to the start of the region if it is found empty).
4243 4251 update_region_limit();
4244 4252 // We will start from _finger not from the start of the region,
4245 4253 // as we might be restarting this task after aborting half-way
4246 4254 // through scanning this region. In this case, _finger points to
4247 4255 // the address where we last found a marked object. If this is a
4248 4256 // fresh region, _finger points to start().
4249 4257 MemRegion mr = MemRegion(_finger, _region_limit);
4250 4258
4251 4259 if (_cm->verbose_low()) {
4252 4260 gclog_or_tty->print_cr("[%d] we're scanning part "
4253 4261 "["PTR_FORMAT", "PTR_FORMAT") "
4254 4262 "of region "PTR_FORMAT,
4255 4263 _task_id, _finger, _region_limit, _curr_region);
4256 4264 }
4257 4265
4258 4266 // Let's iterate over the bitmap of the part of the
4259 4267 // region that is left.
4260 4268 bitmap_closure.set_scanning_heap_region(true);
4261 4269 if (mr.is_empty() ||
4262 4270 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4263 4271 // We successfully completed iterating over the region. Now,
4264 4272 // let's give up the region.
4265 4273 giveup_current_region();
4266 4274 regular_clock_call();
4267 4275 } else {
4268 4276 assert(has_aborted(), "currently the only way to do so");
4269 4277 // The only way to abort the bitmap iteration is to return
4270 4278 // false from the do_bit() method. However, inside the
4271 4279 // do_bit() method we move the _finger to point to the
4272 4280 // object currently being looked at. So, if we bail out, we
4273 4281 // have definitely set _finger to something non-null.
4274 4282 assert(_finger != NULL, "invariant");
4275 4283
4276 4284 // Region iteration was actually aborted. So now _finger
4277 4285 // points to the address of the object we last scanned. If we
4278 4286 // leave it there, when we restart this task, we will rescan
4279 4287 // the object. It is easy to avoid this. We move the finger by
4280 4288 // enough to point to the next possible object header (the
4281 4289 // bitmap knows by how much we need to move it as it knows its
4282 4290 // granularity).
4283 4291 assert(_finger < _region_limit, "invariant");
4284 4292 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4285 4293 // Check if bitmap iteration was aborted while scanning the last object
4286 4294 if (new_finger >= _region_limit) {
4287 4295 giveup_current_region();
4288 4296 } else {
4289 4297 move_finger_to(new_finger);
4290 4298 }
4291 4299 }
4292 4300 }
4293 4301 // At this point we have either completed iterating over the
4294 4302 // region we were holding on to, or we have aborted.
4295 4303
4296 4304 // We then partially drain the local queue and the global stack.
4297 4305 // (Do we really need this?)
4298 4306 drain_local_queue(true);
4299 4307 drain_global_stack(true);
4300 4308
4301 4309 // Read the note on the claim_region() method on why it might
4302 4310 // return NULL with potentially more regions available for
4303 4311 // claiming and why we have to check out_of_regions() to determine
4304 4312 // whether we're done or not.
4305 4313 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4306 4314 // We are going to try to claim a new region. We should have
4307 4315 // given up on the previous one.
4308 4316 // Separated the asserts so that we know which one fires.
4309 4317 assert(_curr_region == NULL, "invariant");
4310 4318 assert(_finger == NULL, "invariant");
4311 4319 assert(_region_limit == NULL, "invariant");
4312 4320 if (_cm->verbose_low()) {
4313 4321 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4314 4322 }
4315 4323 HeapRegion* claimed_region = _cm->claim_region(_task_id);
4316 4324 if (claimed_region != NULL) {
4317 4325 // Yes, we managed to claim one
4318 4326 statsOnly( ++_regions_claimed );
4319 4327
4320 4328 if (_cm->verbose_low()) {
4321 4329 gclog_or_tty->print_cr("[%d] we successfully claimed "
4322 4330 "region "PTR_FORMAT,
4323 4331 _task_id, claimed_region);
4324 4332 }
4325 4333
4326 4334 setup_for_region(claimed_region);
4327 4335 assert(_curr_region == claimed_region, "invariant");
4328 4336 }
4329 4337 // It is important to call the regular clock here. It might take
4330 4338 // a while to claim a region if, for example, we hit a large
4331 4339 // block of empty regions. So we need to call the regular clock
4332 4340 // method once round the loop to make sure it's called
4333 4341 // frequently enough.
4334 4342 regular_clock_call();
4335 4343 }
4336 4344
4337 4345 if (!has_aborted() && _curr_region == NULL) {
4338 4346 assert(_cm->out_of_regions(),
4339 4347 "at this point we should be out of regions");
4340 4348 }
4341 4349 } while ( _curr_region != NULL && !has_aborted());
4342 4350
4343 4351 if (!has_aborted()) {
4344 4352 // We cannot check whether the global stack is empty, since other
4345 4353 // tasks might be pushing objects to it concurrently. We also cannot
4346 4354 // check if the region stack is empty because if a thread is aborting
4347 4355 // it can push a partially done region back.
4348 4356 assert(_cm->out_of_regions(),
4349 4357 "at this point we should be out of regions");
4350 4358
4351 4359 if (_cm->verbose_low()) {
4352 4360 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4353 4361 }
4354 4362
4355 4363 // Try to reduce the number of available SATB buffers so that
4356 4364 // remark has less work to do.
4357 4365 drain_satb_buffers();
4358 4366 }
4359 4367
4360 4368 // Since we've done everything else, we can now totally drain the
4361 4369 // local queue and global stack.
4362 4370 drain_local_queue(false);
4363 4371 drain_global_stack(false);
4364 4372
4365 4373 // Attempt at work stealing from other task's queues.
4366 4374 if (do_stealing && !has_aborted()) {
4367 4375 // We have not aborted. This means that we have finished all that
4368 4376 // we could. Let's try to do some stealing...
4369 4377
4370 4378 // We cannot check whether the global stack is empty, since other
4371 4379 // tasks might be pushing objects to it concurrently. We also cannot
4372 4380 // check if the region stack is empty because if a thread is aborting
4373 4381 // it can push a partially done region back.
4374 4382 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4375 4383 "only way to reach here");
4376 4384
4377 4385 if (_cm->verbose_low()) {
4378 4386 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4379 4387 }
4380 4388
4381 4389 while (!has_aborted()) {
4382 4390 oop obj;
4383 4391 statsOnly( ++_steal_attempts );
4384 4392
4385 4393 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4386 4394 if (_cm->verbose_medium()) {
4387 4395 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4388 4396 _task_id, (void*) obj);
4389 4397 }
4390 4398
4391 4399 statsOnly( ++_steals );
4392 4400
4393 4401 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4394 4402 "any stolen object should be marked");
4395 4403 scan_object(obj);
4396 4404
4397 4405 // And since we're towards the end, let's totally drain the
4398 4406 // local queue and global stack.
4399 4407 drain_local_queue(false);
4400 4408 drain_global_stack(false);
4401 4409 } else {
4402 4410 break;
4403 4411 }
4404 4412 }
4405 4413 }
4406 4414
4407 4415 // If we are about to wrap up and go into termination, check if we
4408 4416 // should raise the overflow flag.
4409 4417 if (do_termination && !has_aborted()) {
4410 4418 if (_cm->force_overflow()->should_force()) {
4411 4419 _cm->set_has_overflown();
4412 4420 regular_clock_call();
4413 4421 }
4414 4422 }
4415 4423
4416 4424 // We still haven't aborted. Now, let's try to get into the
4417 4425 // termination protocol.
4418 4426 if (do_termination && !has_aborted()) {
4419 4427 // We cannot check whether the global stack is empty, since other
4420 4428 // tasks might be concurrently pushing objects on it. We also cannot
4421 4429 // check if the region stack is empty because if a thread is aborting
4422 4430 // it can push a partially done region back.
4423 4431 // Separated the asserts so that we know which one fires.
4424 4432 assert(_cm->out_of_regions(), "only way to reach here");
4425 4433 assert(_task_queue->size() == 0, "only way to reach here");
4426 4434
4427 4435 if (_cm->verbose_low()) {
4428 4436 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4429 4437 }
4430 4438
4431 4439 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4432 4440 // The CMTask class also extends the TerminatorTerminator class,
4433 4441 // hence its should_exit_termination() method will also decide
4434 4442 // whether to exit the termination protocol or not.
4435 4443 bool finished = _cm->terminator()->offer_termination(this);
4436 4444 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4437 4445 _termination_time_ms +=
4438 4446 termination_end_time_ms - _termination_start_time_ms;
4439 4447
4440 4448 if (finished) {
4441 4449 // We're all done.
4442 4450
4443 4451 if (_task_id == 0) {
4444 4452 // let's allow task 0 to do this
4445 4453 if (concurrent()) {
4446 4454 assert(_cm->concurrent_marking_in_progress(), "invariant");
4447 4455 // we need to set this to false before the next
4448 4456 // safepoint. This way we ensure that the marking phase
4449 4457 // doesn't observe any more heap expansions.
4450 4458 _cm->clear_concurrent_marking_in_progress();
4451 4459 }
4452 4460 }
4453 4461
4454 4462 // We can now guarantee that the global stack is empty, since
4455 4463 // all other tasks have finished. We separated the guarantees so
4456 4464 // that, if a condition is false, we can immediately find out
4457 4465 // which one.
4458 4466 guarantee(_cm->out_of_regions(), "only way to reach here");
4459 4467 guarantee(_aborted_region.is_empty(), "only way to reach here");
4460 4468 guarantee(_cm->region_stack_empty(), "only way to reach here");
4461 4469 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4462 4470 guarantee(_task_queue->size() == 0, "only way to reach here");
4463 4471 guarantee(!_cm->has_overflown(), "only way to reach here");
4464 4472 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4465 4473 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4466 4474
4467 4475 if (_cm->verbose_low()) {
4468 4476 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4469 4477 }
4470 4478 } else {
4471 4479 // Apparently there's more work to do. Let's abort this task. It
4472 4480 // will restart it and we can hopefully find more things to do.
4473 4481
4474 4482 if (_cm->verbose_low()) {
4475 4483 gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4476 4484 _task_id);
4477 4485 }
4478 4486
4479 4487 set_has_aborted();
4480 4488 statsOnly( ++_aborted_termination );
4481 4489 }
4482 4490 }
4483 4491
4484 4492 // Mainly for debugging purposes to make sure that a pointer to the
4485 4493 // closure which was statically allocated in this frame doesn't
4486 4494 // escape it by accident.
4487 4495 set_cm_oop_closure(NULL);
4488 4496 double end_time_ms = os::elapsedVTime() * 1000.0;
4489 4497 double elapsed_time_ms = end_time_ms - _start_time_ms;
4490 4498 // Update the step history.
4491 4499 _step_times_ms.add(elapsed_time_ms);
4492 4500
4493 4501 if (has_aborted()) {
4494 4502 // The task was aborted for some reason.
4495 4503
4496 4504 statsOnly( ++_aborted );
4497 4505
4498 4506 if (_has_timed_out) {
4499 4507 double diff_ms = elapsed_time_ms - _time_target_ms;
4500 4508 // Keep statistics of how well we did with respect to hitting
4501 4509 // our target only if we actually timed out (if we aborted for
4502 4510 // other reasons, then the results might get skewed).
4503 4511 _marking_step_diffs_ms.add(diff_ms);
4504 4512 }
4505 4513
4506 4514 if (_cm->has_overflown()) {
4507 4515 // This is the interesting one. We aborted because a global
4508 4516 // overflow was raised. This means we have to restart the
4509 4517 // marking phase and start iterating over regions. However, in
4510 4518 // order to do this we have to make sure that all tasks stop
4511 4519 // what they are doing and re-initialise in a safe manner. We
4512 4520 // will achieve this with the use of two barrier sync points.
4513 4521
4514 4522 if (_cm->verbose_low()) {
4515 4523 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4516 4524 }
4517 4525
4518 4526 _cm->enter_first_sync_barrier(_task_id);
4519 4527 // When we exit this sync barrier we know that all tasks have
4520 4528 // stopped doing marking work. So, it's now safe to
4521 4529 // re-initialise our data structures. At the end of this method,
4522 4530 // task 0 will clear the global data structures.
4523 4531
4524 4532 statsOnly( ++_aborted_overflow );
4525 4533
4526 4534 // We clear the local state of this task...
4527 4535 clear_region_fields();
4528 4536
4529 4537 // ...and enter the second barrier.
4530 4538 _cm->enter_second_sync_barrier(_task_id);
4531 4539 // At this point everything has bee re-initialised and we're
4532 4540 // ready to restart.
4533 4541 }
4534 4542
4535 4543 if (_cm->verbose_low()) {
4536 4544 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4537 4545 "elapsed = %1.2lfms <<<<<<<<<<",
4538 4546 _task_id, _time_target_ms, elapsed_time_ms);
4539 4547 if (_cm->has_aborted()) {
4540 4548 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4541 4549 _task_id);
4542 4550 }
4543 4551 }
4544 4552 } else {
4545 4553 if (_cm->verbose_low()) {
4546 4554 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4547 4555 "elapsed = %1.2lfms <<<<<<<<<<",
4548 4556 _task_id, _time_target_ms, elapsed_time_ms);
4549 4557 }
4550 4558 }
4551 4559
4552 4560 _claimed = false;
4553 4561 }
4554 4562
4555 4563 CMTask::CMTask(int task_id,
4556 4564 ConcurrentMark* cm,
4557 4565 CMTaskQueue* task_queue,
4558 4566 CMTaskQueueSet* task_queues)
4559 4567 : _g1h(G1CollectedHeap::heap()),
4560 4568 _task_id(task_id), _cm(cm),
4561 4569 _claimed(false),
4562 4570 _nextMarkBitMap(NULL), _hash_seed(17),
4563 4571 _task_queue(task_queue),
4564 4572 _task_queues(task_queues),
4565 4573 _cm_oop_closure(NULL),
4566 4574 _aborted_region(MemRegion()) {
4567 4575 guarantee(task_queue != NULL, "invariant");
4568 4576 guarantee(task_queues != NULL, "invariant");
4569 4577
4570 4578 statsOnly( _clock_due_to_scanning = 0;
4571 4579 _clock_due_to_marking = 0 );
4572 4580
4573 4581 _marking_step_diffs_ms.add(0.5);
4574 4582 }
4575 4583
4576 4584 // These are formatting macros that are used below to ensure
4577 4585 // consistent formatting. The *_H_* versions are used to format the
4578 4586 // header for a particular value and they should be kept consistent
4579 4587 // with the corresponding macro. Also note that most of the macros add
4580 4588 // the necessary white space (as a prefix) which makes them a bit
4581 4589 // easier to compose.
4582 4590
4583 4591 // All the output lines are prefixed with this string to be able to
4584 4592 // identify them easily in a large log file.
4585 4593 #define G1PPRL_LINE_PREFIX "###"
4586 4594
4587 4595 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4588 4596 #ifdef _LP64
4589 4597 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4590 4598 #else // _LP64
4591 4599 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4592 4600 #endif // _LP64
4593 4601
4594 4602 // For per-region info
4595 4603 #define G1PPRL_TYPE_FORMAT " %-4s"
4596 4604 #define G1PPRL_TYPE_H_FORMAT " %4s"
4597 4605 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4598 4606 #define G1PPRL_BYTE_H_FORMAT " %9s"
4599 4607 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4600 4608 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4601 4609
4602 4610 // For summary info
4603 4611 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4604 4612 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4605 4613 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4606 4614 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4607 4615
4608 4616 G1PrintRegionLivenessInfoClosure::
4609 4617 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4610 4618 : _out(out),
4611 4619 _total_used_bytes(0), _total_capacity_bytes(0),
4612 4620 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4613 4621 _hum_used_bytes(0), _hum_capacity_bytes(0),
4614 4622 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4615 4623 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4616 4624 MemRegion g1_committed = g1h->g1_committed();
4617 4625 MemRegion g1_reserved = g1h->g1_reserved();
4618 4626 double now = os::elapsedTime();
4619 4627
4620 4628 // Print the header of the output.
4621 4629 _out->cr();
4622 4630 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4623 4631 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4624 4632 G1PPRL_SUM_ADDR_FORMAT("committed")
4625 4633 G1PPRL_SUM_ADDR_FORMAT("reserved")
4626 4634 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4627 4635 g1_committed.start(), g1_committed.end(),
4628 4636 g1_reserved.start(), g1_reserved.end(),
4629 4637 HeapRegion::GrainBytes);
4630 4638 _out->print_cr(G1PPRL_LINE_PREFIX);
4631 4639 _out->print_cr(G1PPRL_LINE_PREFIX
4632 4640 G1PPRL_TYPE_H_FORMAT
4633 4641 G1PPRL_ADDR_BASE_H_FORMAT
4634 4642 G1PPRL_BYTE_H_FORMAT
4635 4643 G1PPRL_BYTE_H_FORMAT
4636 4644 G1PPRL_BYTE_H_FORMAT
4637 4645 G1PPRL_DOUBLE_H_FORMAT,
4638 4646 "type", "address-range",
4639 4647 "used", "prev-live", "next-live", "gc-eff");
4640 4648 }
4641 4649
4642 4650 // It takes as a parameter a reference to one of the _hum_* fields, it
4643 4651 // deduces the corresponding value for a region in a humongous region
4644 4652 // series (either the region size, or what's left if the _hum_* field
4645 4653 // is < the region size), and updates the _hum_* field accordingly.
4646 4654 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4647 4655 size_t bytes = 0;
4648 4656 // The > 0 check is to deal with the prev and next live bytes which
4649 4657 // could be 0.
4650 4658 if (*hum_bytes > 0) {
4651 4659 bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
4652 4660 *hum_bytes -= bytes;
4653 4661 }
4654 4662 return bytes;
4655 4663 }
4656 4664
4657 4665 // It deduces the values for a region in a humongous region series
4658 4666 // from the _hum_* fields and updates those accordingly. It assumes
4659 4667 // that that _hum_* fields have already been set up from the "starts
4660 4668 // humongous" region and we visit the regions in address order.
4661 4669 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4662 4670 size_t* capacity_bytes,
4663 4671 size_t* prev_live_bytes,
4664 4672 size_t* next_live_bytes) {
4665 4673 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4666 4674 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4667 4675 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4668 4676 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4669 4677 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4670 4678 }
4671 4679
4672 4680 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4673 4681 const char* type = "";
4674 4682 HeapWord* bottom = r->bottom();
4675 4683 HeapWord* end = r->end();
4676 4684 size_t capacity_bytes = r->capacity();
4677 4685 size_t used_bytes = r->used();
4678 4686 size_t prev_live_bytes = r->live_bytes();
4679 4687 size_t next_live_bytes = r->next_live_bytes();
4680 4688 double gc_eff = r->gc_efficiency();
4681 4689 if (r->used() == 0) {
4682 4690 type = "FREE";
4683 4691 } else if (r->is_survivor()) {
4684 4692 type = "SURV";
4685 4693 } else if (r->is_young()) {
4686 4694 type = "EDEN";
4687 4695 } else if (r->startsHumongous()) {
4688 4696 type = "HUMS";
4689 4697
4690 4698 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4691 4699 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4692 4700 "they should have been zeroed after the last time we used them");
4693 4701 // Set up the _hum_* fields.
4694 4702 _hum_capacity_bytes = capacity_bytes;
4695 4703 _hum_used_bytes = used_bytes;
4696 4704 _hum_prev_live_bytes = prev_live_bytes;
4697 4705 _hum_next_live_bytes = next_live_bytes;
4698 4706 get_hum_bytes(&used_bytes, &capacity_bytes,
4699 4707 &prev_live_bytes, &next_live_bytes);
4700 4708 end = bottom + HeapRegion::GrainWords;
4701 4709 } else if (r->continuesHumongous()) {
4702 4710 type = "HUMC";
4703 4711 get_hum_bytes(&used_bytes, &capacity_bytes,
4704 4712 &prev_live_bytes, &next_live_bytes);
4705 4713 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4706 4714 } else {
4707 4715 type = "OLD";
4708 4716 }
4709 4717
4710 4718 _total_used_bytes += used_bytes;
4711 4719 _total_capacity_bytes += capacity_bytes;
4712 4720 _total_prev_live_bytes += prev_live_bytes;
4713 4721 _total_next_live_bytes += next_live_bytes;
4714 4722
4715 4723 // Print a line for this particular region.
4716 4724 _out->print_cr(G1PPRL_LINE_PREFIX
4717 4725 G1PPRL_TYPE_FORMAT
4718 4726 G1PPRL_ADDR_BASE_FORMAT
4719 4727 G1PPRL_BYTE_FORMAT
4720 4728 G1PPRL_BYTE_FORMAT
4721 4729 G1PPRL_BYTE_FORMAT
4722 4730 G1PPRL_DOUBLE_FORMAT,
4723 4731 type, bottom, end,
4724 4732 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4725 4733
4726 4734 return false;
4727 4735 }
4728 4736
4729 4737 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4730 4738 // Print the footer of the output.
4731 4739 _out->print_cr(G1PPRL_LINE_PREFIX);
4732 4740 _out->print_cr(G1PPRL_LINE_PREFIX
4733 4741 " SUMMARY"
4734 4742 G1PPRL_SUM_MB_FORMAT("capacity")
4735 4743 G1PPRL_SUM_MB_PERC_FORMAT("used")
4736 4744 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4737 4745 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4738 4746 bytes_to_mb(_total_capacity_bytes),
4739 4747 bytes_to_mb(_total_used_bytes),
4740 4748 perc(_total_used_bytes, _total_capacity_bytes),
4741 4749 bytes_to_mb(_total_prev_live_bytes),
4742 4750 perc(_total_prev_live_bytes, _total_capacity_bytes),
4743 4751 bytes_to_mb(_total_next_live_bytes),
4744 4752 perc(_total_next_live_bytes, _total_capacity_bytes));
4745 4753 _out->cr();
4746 4754 }
↓ open down ↓ |
1331 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX