Print this page
rev 2724 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by:
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
33 33 #include "gc_implementation/g1/g1RemSet.hpp"
34 34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
35 35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
36 36 #include "gc_implementation/shared/vmGCOperations.hpp"
37 37 #include "memory/genOopClosures.inline.hpp"
38 38 #include "memory/referencePolicy.hpp"
39 39 #include "memory/resourceArea.hpp"
40 40 #include "oops/oop.inline.hpp"
41 41 #include "runtime/handles.inline.hpp"
42 42 #include "runtime/java.hpp"
43 43
44 44 //
45 45 // CMS Bit Map Wrapper
46 46
47 47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
48 48 _bm((uintptr_t*)NULL,0),
49 49 _shifter(shifter) {
50 50 _bmStartWord = (HeapWord*)(rs.base());
51 51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
52 52 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
53 53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
54 54
55 55 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
56 56 // For now we'll just commit all of the bit map up fromt.
57 57 // Later on we'll try to be more parsimonious with swap.
58 58 guarantee(_virtual_space.initialize(brs, brs.size()),
59 59 "couldn't reseve backing store for CMS bit map");
60 60 assert(_virtual_space.committed_size() == brs.size(),
61 61 "didn't reserve backing store for all of CMS bit map?");
62 62 _bm.set_map((uintptr_t*)_virtual_space.low());
63 63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
64 64 _bmWordSize, "inconsistency in bit map sizing");
65 65 _bm.set_size(_bmWordSize >> _shifter);
66 66 }
67 67
68 68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
69 69 HeapWord* limit) const {
70 70 // First we must round addr *up* to a possible object boundary.
71 71 addr = (HeapWord*)align_size_up((intptr_t)addr,
72 72 HeapWordSize << _shifter);
73 73 size_t addrOffset = heapWordToOffset(addr);
74 74 if (limit == NULL) {
75 75 limit = _bmStartWord + _bmWordSize;
76 76 }
77 77 size_t limitOffset = heapWordToOffset(limit);
78 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
79 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
80 80 assert(nextAddr >= addr, "get_next_one postcondition");
81 81 assert(nextAddr == limit || isMarked(nextAddr),
82 82 "get_next_one postcondition");
83 83 return nextAddr;
84 84 }
85 85
86 86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
87 87 HeapWord* limit) const {
88 88 size_t addrOffset = heapWordToOffset(addr);
89 89 if (limit == NULL) {
90 90 limit = _bmStartWord + _bmWordSize;
91 91 }
92 92 size_t limitOffset = heapWordToOffset(limit);
93 93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
94 94 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
95 95 assert(nextAddr >= addr, "get_next_one postcondition");
96 96 assert(nextAddr == limit || !isMarked(nextAddr),
97 97 "get_next_one postcondition");
98 98 return nextAddr;
99 99 }
100 100
101 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
102 102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
103 103 return (int) (diff >> _shifter);
104 104 }
105 105
106 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
107 107 HeapWord* left = MAX2(_bmStartWord, mr.start());
108 108 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
109 109 if (right > left) {
110 110 // Right-open interval [leftOffset, rightOffset).
111 111 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
112 112 } else {
113 113 return true;
114 114 }
115 115 }
116 116
117 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
118 118 size_t from_start_index,
119 119 HeapWord* to_start_word,
120 120 size_t word_num) {
121 121 _bm.mostly_disjoint_range_union(from_bitmap,
122 122 from_start_index,
123 123 heapWordToOffset(to_start_word),
124 124 word_num);
125 125 }
126 126
127 127 #ifndef PRODUCT
128 128 bool CMBitMapRO::covers(ReservedSpace rs) const {
129 129 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
130 130 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
131 131 "size inconsistency");
132 132 return _bmStartWord == (HeapWord*)(rs.base()) &&
133 133 _bmWordSize == rs.size()>>LogHeapWordSize;
134 134 }
135 135 #endif
136 136
137 137 void CMBitMap::clearAll() {
138 138 _bm.clear();
139 139 return;
140 140 }
141 141
142 142 void CMBitMap::markRange(MemRegion mr) {
143 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
144 144 assert(!mr.is_empty(), "unexpected empty region");
145 145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
146 146 ((HeapWord *) mr.end())),
147 147 "markRange memory region end is not card aligned");
148 148 // convert address range into offset range
149 149 _bm.at_put_range(heapWordToOffset(mr.start()),
150 150 heapWordToOffset(mr.end()), true);
151 151 }
152 152
153 153 void CMBitMap::clearRange(MemRegion mr) {
154 154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
155 155 assert(!mr.is_empty(), "unexpected empty region");
156 156 // convert address range into offset range
157 157 _bm.at_put_range(heapWordToOffset(mr.start()),
158 158 heapWordToOffset(mr.end()), false);
159 159 }
160 160
161 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
162 162 HeapWord* end_addr) {
163 163 HeapWord* start = getNextMarkedWordAddress(addr);
164 164 start = MIN2(start, end_addr);
165 165 HeapWord* end = getNextUnmarkedWordAddress(start);
166 166 end = MIN2(end, end_addr);
167 167 assert(start <= end, "Consistency check");
168 168 MemRegion mr(start, end);
169 169 if (!mr.is_empty()) {
170 170 clearRange(mr);
171 171 }
172 172 return mr;
173 173 }
174 174
175 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
176 176 _base(NULL), _cm(cm)
177 177 #ifdef ASSERT
178 178 , _drain_in_progress(false)
179 179 , _drain_in_progress_yields(false)
180 180 #endif
181 181 {}
182 182
183 183 void CMMarkStack::allocate(size_t size) {
184 184 _base = NEW_C_HEAP_ARRAY(oop, size);
185 185 if (_base == NULL) {
186 186 vm_exit_during_initialization("Failed to allocate "
187 187 "CM region mark stack");
188 188 }
189 189 _index = 0;
190 190 _capacity = (jint) size;
191 191 _oops_do_bound = -1;
192 192 NOT_PRODUCT(_max_depth = 0);
193 193 }
194 194
195 195 CMMarkStack::~CMMarkStack() {
196 196 if (_base != NULL) {
197 197 FREE_C_HEAP_ARRAY(oop, _base);
198 198 }
199 199 }
200 200
201 201 void CMMarkStack::par_push(oop ptr) {
202 202 while (true) {
203 203 if (isFull()) {
204 204 _overflow = true;
205 205 return;
206 206 }
207 207 // Otherwise...
208 208 jint index = _index;
209 209 jint next_index = index+1;
210 210 jint res = Atomic::cmpxchg(next_index, &_index, index);
211 211 if (res == index) {
212 212 _base[index] = ptr;
213 213 // Note that we don't maintain this atomically. We could, but it
214 214 // doesn't seem necessary.
215 215 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
216 216 return;
217 217 }
218 218 // Otherwise, we need to try again.
219 219 }
220 220 }
221 221
222 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
223 223 while (true) {
224 224 if (isFull()) {
225 225 _overflow = true;
226 226 return;
227 227 }
228 228 // Otherwise...
229 229 jint index = _index;
230 230 jint next_index = index + n;
231 231 if (next_index > _capacity) {
232 232 _overflow = true;
233 233 return;
234 234 }
235 235 jint res = Atomic::cmpxchg(next_index, &_index, index);
236 236 if (res == index) {
237 237 for (int i = 0; i < n; i++) {
238 238 int ind = index + i;
239 239 assert(ind < _capacity, "By overflow test above.");
240 240 _base[ind] = ptr_arr[i];
241 241 }
242 242 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
243 243 return;
244 244 }
245 245 // Otherwise, we need to try again.
246 246 }
247 247 }
248 248
249 249
250 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
251 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
252 252 jint start = _index;
253 253 jint next_index = start + n;
254 254 if (next_index > _capacity) {
255 255 _overflow = true;
256 256 return;
257 257 }
258 258 // Otherwise.
259 259 _index = next_index;
260 260 for (int i = 0; i < n; i++) {
261 261 int ind = start + i;
262 262 assert(ind < _capacity, "By overflow test above.");
263 263 _base[ind] = ptr_arr[i];
264 264 }
265 265 }
266 266
267 267
268 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
269 269 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
270 270 jint index = _index;
271 271 if (index == 0) {
272 272 *n = 0;
273 273 return false;
274 274 } else {
275 275 int k = MIN2(max, index);
276 276 jint new_ind = index - k;
277 277 for (int j = 0; j < k; j++) {
278 278 ptr_arr[j] = _base[new_ind + j];
279 279 }
280 280 _index = new_ind;
281 281 *n = k;
282 282 return true;
283 283 }
284 284 }
285 285
286 286
287 287 CMRegionStack::CMRegionStack() : _base(NULL) {}
288 288
289 289 void CMRegionStack::allocate(size_t size) {
290 290 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
291 291 if (_base == NULL) {
292 292 vm_exit_during_initialization("Failed to allocate CM region mark stack");
293 293 }
294 294 _index = 0;
295 295 _capacity = (jint) size;
296 296 }
297 297
298 298 CMRegionStack::~CMRegionStack() {
299 299 if (_base != NULL) {
300 300 FREE_C_HEAP_ARRAY(oop, _base);
301 301 }
302 302 }
303 303
304 304 void CMRegionStack::push_lock_free(MemRegion mr) {
305 305 assert(mr.word_size() > 0, "Precondition");
306 306 while (true) {
307 307 jint index = _index;
308 308
309 309 if (index >= _capacity) {
310 310 _overflow = true;
311 311 return;
312 312 }
313 313 // Otherwise...
314 314 jint next_index = index+1;
315 315 jint res = Atomic::cmpxchg(next_index, &_index, index);
316 316 if (res == index) {
317 317 _base[index] = mr;
318 318 return;
319 319 }
320 320 // Otherwise, we need to try again.
321 321 }
322 322 }
323 323
324 324 // Lock-free pop of the region stack. Called during the concurrent
325 325 // marking / remark phases. Should only be called in tandem with
326 326 // other lock-free pops.
327 327 MemRegion CMRegionStack::pop_lock_free() {
328 328 while (true) {
329 329 jint index = _index;
330 330
331 331 if (index == 0) {
332 332 return MemRegion();
333 333 }
334 334 // Otherwise...
335 335 jint next_index = index-1;
336 336 jint res = Atomic::cmpxchg(next_index, &_index, index);
337 337 if (res == index) {
338 338 MemRegion mr = _base[next_index];
339 339 if (mr.start() != NULL) {
340 340 assert(mr.end() != NULL, "invariant");
341 341 assert(mr.word_size() > 0, "invariant");
342 342 return mr;
343 343 } else {
344 344 // that entry was invalidated... let's skip it
345 345 assert(mr.end() == NULL, "invariant");
346 346 }
347 347 }
348 348 // Otherwise, we need to try again.
349 349 }
350 350 }
351 351
352 352 #if 0
353 353 // The routines that manipulate the region stack with a lock are
354 354 // not currently used. They should be retained, however, as a
355 355 // diagnostic aid.
356 356
357 357 void CMRegionStack::push_with_lock(MemRegion mr) {
358 358 assert(mr.word_size() > 0, "Precondition");
359 359 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
360 360
361 361 if (isFull()) {
362 362 _overflow = true;
363 363 return;
364 364 }
365 365
366 366 _base[_index] = mr;
367 367 _index += 1;
368 368 }
369 369
370 370 MemRegion CMRegionStack::pop_with_lock() {
371 371 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
372 372
373 373 while (true) {
374 374 if (_index == 0) {
375 375 return MemRegion();
376 376 }
377 377 _index -= 1;
378 378
379 379 MemRegion mr = _base[_index];
380 380 if (mr.start() != NULL) {
381 381 assert(mr.end() != NULL, "invariant");
382 382 assert(mr.word_size() > 0, "invariant");
383 383 return mr;
384 384 } else {
385 385 // that entry was invalidated... let's skip it
386 386 assert(mr.end() == NULL, "invariant");
387 387 }
388 388 }
389 389 }
390 390 #endif
391 391
392 392 bool CMRegionStack::invalidate_entries_into_cset() {
393 393 bool result = false;
394 394 G1CollectedHeap* g1h = G1CollectedHeap::heap();
395 395 for (int i = 0; i < _oops_do_bound; ++i) {
396 396 MemRegion mr = _base[i];
397 397 if (mr.start() != NULL) {
398 398 assert(mr.end() != NULL, "invariant");
399 399 assert(mr.word_size() > 0, "invariant");
400 400 HeapRegion* hr = g1h->heap_region_containing(mr.start());
401 401 assert(hr != NULL, "invariant");
402 402 if (hr->in_collection_set()) {
403 403 // The region points into the collection set
404 404 _base[i] = MemRegion();
405 405 result = true;
406 406 }
407 407 } else {
408 408 // that entry was invalidated... let's skip it
409 409 assert(mr.end() == NULL, "invariant");
410 410 }
411 411 }
412 412 return result;
413 413 }
414 414
415 415 template<class OopClosureClass>
416 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
417 417 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
418 418 || SafepointSynchronize::is_at_safepoint(),
419 419 "Drain recursion must be yield-safe.");
420 420 bool res = true;
421 421 debug_only(_drain_in_progress = true);
422 422 debug_only(_drain_in_progress_yields = yield_after);
423 423 while (!isEmpty()) {
424 424 oop newOop = pop();
425 425 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
426 426 assert(newOop->is_oop(), "Expected an oop");
427 427 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
428 428 "only grey objects on this stack");
429 429 // iterate over the oops in this oop, marking and pushing
430 430 // the ones in CMS generation.
431 431 newOop->oop_iterate(cl);
432 432 if (yield_after && _cm->do_yield_check()) {
433 433 res = false;
434 434 break;
435 435 }
436 436 }
437 437 debug_only(_drain_in_progress = false);
438 438 return res;
439 439 }
440 440
441 441 void CMMarkStack::oops_do(OopClosure* f) {
442 442 if (_index == 0) return;
443 443 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
444 444 "Bound must be set.");
445 445 for (int i = 0; i < _oops_do_bound; i++) {
446 446 f->do_oop(&_base[i]);
447 447 }
448 448 _oops_do_bound = -1;
449 449 }
450 450
451 451 bool ConcurrentMark::not_yet_marked(oop obj) const {
452 452 return (_g1h->is_obj_ill(obj)
453 453 || (_g1h->is_in_permanent(obj)
454 454 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
455 455 }
456 456
457 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
458 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
459 459 #endif // _MSC_VER
460 460
461 461 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
462 462 int max_regions) :
463 463 _markBitMap1(rs, MinObjAlignment - 1),
464 464 _markBitMap2(rs, MinObjAlignment - 1),
465 465
↓ open down ↓ |
465 lines elided |
↑ open up ↑ |
466 466 _parallel_marking_threads(0),
467 467 _sleep_factor(0.0),
468 468 _marking_task_overhead(1.0),
469 469 _cleanup_sleep_factor(0.0),
470 470 _cleanup_task_overhead(1.0),
471 471 _cleanup_list("Cleanup List"),
472 472 _region_bm(max_regions, false /* in_resource_area*/),
473 473 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
474 474 CardTableModRefBS::card_shift,
475 475 false /* in_resource_area*/),
476 +
476 477 _prevMarkBitMap(&_markBitMap1),
477 478 _nextMarkBitMap(&_markBitMap2),
478 479 _at_least_one_mark_complete(false),
479 480
480 481 _markStack(this),
481 482 _regionStack(),
482 483 // _finger set in set_non_marking_state
483 484
484 485 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
485 486 // _active_tasks set in set_non_marking_state
486 487 // _tasks set inside the constructor
487 488 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
488 489 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
489 490
490 491 _has_overflown(false),
491 492 _concurrent(false),
492 493 _has_aborted(false),
493 494 _restart_for_overflow(false),
494 495 _concurrent_marking_in_progress(false),
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
495 496 _should_gray_objects(false),
496 497
497 498 // _verbose_level set below
498 499
499 500 _init_times(),
500 501 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
501 502 _cleanup_times(),
502 503 _total_counting_time(0.0),
503 504 _total_rs_scrub_time(0.0),
504 505
505 - _parallel_workers(NULL) {
506 + _parallel_workers(NULL),
507 +
508 + _count_card_bitmaps(NULL),
509 + _count_marked_bytes(NULL)
510 +{
506 511 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
507 512 if (verbose_level < no_verbose) {
508 513 verbose_level = no_verbose;
509 514 }
510 515 if (verbose_level > high_verbose) {
511 516 verbose_level = high_verbose;
512 517 }
513 518 _verbose_level = verbose_level;
514 519
515 520 if (verbose_low()) {
516 521 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
517 522 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
518 523 }
519 524
520 525 _markStack.allocate(MarkStackSize);
521 526 _regionStack.allocate(G1MarkRegionStackSize);
522 527
523 528 // Create & start a ConcurrentMark thread.
524 529 _cmThread = new ConcurrentMarkThread(this);
525 530 assert(cmThread() != NULL, "CM Thread should have been created");
526 531 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
527 532
528 533 _g1h = G1CollectedHeap::heap();
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
529 534 assert(CGC_lock != NULL, "Where's the CGC_lock?");
530 535 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
531 536 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
532 537
533 538 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
534 539 satb_qs.set_buffer_size(G1SATBBufferSize);
535 540
536 541 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
537 542 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
538 543
544 + _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num);
545 + _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
546 +
547 + BitMap::idx_t card_bm_size = _card_bm.size();
548 +
539 549 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
540 550 _active_tasks = _max_task_num;
541 551 for (int i = 0; i < (int) _max_task_num; ++i) {
542 552 CMTaskQueue* task_queue = new CMTaskQueue();
543 553 task_queue->initialize();
544 554 _task_queues->register_queue(i, task_queue);
545 555
546 556 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
547 557 _accum_task_vtime[i] = 0.0;
558 +
559 + _count_card_bitmaps[i] = BitMap(card_bm_size, false);
560 + _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
548 561 }
549 562
550 563 if (ConcGCThreads > ParallelGCThreads) {
551 564 vm_exit_during_initialization("Can't have more ConcGCThreads "
552 565 "than ParallelGCThreads.");
553 566 }
554 567 if (ParallelGCThreads == 0) {
555 568 // if we are not running with any parallel GC threads we will not
556 569 // spawn any marking threads either
557 570 _parallel_marking_threads = 0;
558 571 _sleep_factor = 0.0;
559 572 _marking_task_overhead = 1.0;
560 573 } else {
561 574 if (ConcGCThreads > 0) {
562 575 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
563 576 // if both are set
564 577
565 578 _parallel_marking_threads = ConcGCThreads;
566 579 _sleep_factor = 0.0;
567 580 _marking_task_overhead = 1.0;
568 581 } else if (G1MarkingOverheadPercent > 0) {
569 582 // we will calculate the number of parallel marking threads
570 583 // based on a target overhead with respect to the soft real-time
571 584 // goal
572 585
573 586 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
574 587 double overall_cm_overhead =
575 588 (double) MaxGCPauseMillis * marking_overhead /
576 589 (double) GCPauseIntervalMillis;
577 590 double cpu_ratio = 1.0 / (double) os::processor_count();
578 591 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
579 592 double marking_task_overhead =
580 593 overall_cm_overhead / marking_thread_num *
581 594 (double) os::processor_count();
582 595 double sleep_factor =
583 596 (1.0 - marking_task_overhead) / marking_task_overhead;
584 597
585 598 _parallel_marking_threads = (size_t) marking_thread_num;
586 599 _sleep_factor = sleep_factor;
587 600 _marking_task_overhead = marking_task_overhead;
588 601 } else {
589 602 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
590 603 _sleep_factor = 0.0;
591 604 _marking_task_overhead = 1.0;
592 605 }
593 606
594 607 if (parallel_marking_threads() > 1) {
595 608 _cleanup_task_overhead = 1.0;
596 609 } else {
597 610 _cleanup_task_overhead = marking_task_overhead();
598 611 }
599 612 _cleanup_sleep_factor =
600 613 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
601 614
602 615 #if 0
603 616 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
604 617 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
605 618 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
606 619 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
607 620 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
608 621 #endif
609 622
610 623 guarantee(parallel_marking_threads() > 0, "peace of mind");
611 624 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
612 625 (int) _parallel_marking_threads, false, true);
613 626 if (_parallel_workers == NULL) {
614 627 vm_exit_during_initialization("Failed necessary allocation.");
615 628 } else {
616 629 _parallel_workers->initialize_workers();
617 630 }
618 631 }
619 632
620 633 // so that the call below can read a sensible value
621 634 _heap_start = (HeapWord*) rs.base();
622 635 set_non_marking_state();
623 636 }
624 637
625 638 void ConcurrentMark::update_g1_committed(bool force) {
626 639 // If concurrent marking is not in progress, then we do not need to
627 640 // update _heap_end. This has a subtle and important
628 641 // side-effect. Imagine that two evacuation pauses happen between
629 642 // marking completion and remark. The first one can grow the
630 643 // heap (hence now the finger is below the heap end). Then, the
631 644 // second one could unnecessarily push regions on the region
632 645 // stack. This causes the invariant that the region stack is empty
633 646 // at the beginning of remark to be false. By ensuring that we do
634 647 // not observe heap expansions after marking is complete, then we do
635 648 // not have this problem.
636 649 if (!concurrent_marking_in_progress() && !force) return;
637 650
638 651 MemRegion committed = _g1h->g1_committed();
639 652 assert(committed.start() == _heap_start, "start shouldn't change");
640 653 HeapWord* new_end = committed.end();
641 654 if (new_end > _heap_end) {
642 655 // The heap has been expanded.
643 656
644 657 _heap_end = new_end;
645 658 }
646 659 // Notice that the heap can also shrink. However, this only happens
647 660 // during a Full GC (at least currently) and the entire marking
648 661 // phase will bail out and the task will not be restarted. So, let's
649 662 // do nothing.
650 663 }
651 664
652 665 void ConcurrentMark::reset() {
653 666 // Starting values for these two. This should be called in a STW
654 667 // phase. CM will be notified of any future g1_committed expansions
655 668 // will be at the end of evacuation pauses, when tasks are
656 669 // inactive.
657 670 MemRegion committed = _g1h->g1_committed();
658 671 _heap_start = committed.start();
↓ open down ↓ |
101 lines elided |
↑ open up ↑ |
659 672 _heap_end = committed.end();
660 673
661 674 // Separated the asserts so that we know which one fires.
662 675 assert(_heap_start != NULL, "heap bounds should look ok");
663 676 assert(_heap_end != NULL, "heap bounds should look ok");
664 677 assert(_heap_start < _heap_end, "heap bounds should look ok");
665 678
666 679 // reset all the marking data structures and any necessary flags
667 680 clear_marking_state();
668 681
682 + clear_all_count_data();
683 +
669 684 if (verbose_low()) {
670 685 gclog_or_tty->print_cr("[global] resetting");
671 686 }
672 687
673 688 // We do reset all of them, since different phases will use
674 689 // different number of active threads. So, it's easiest to have all
675 690 // of them ready.
676 691 for (int i = 0; i < (int) _max_task_num; ++i) {
677 692 _tasks[i]->reset(_nextMarkBitMap);
678 693 }
679 694
680 695 // we need this to make sure that the flag is on during the evac
681 696 // pause with initial mark piggy-backed
682 697 set_concurrent_marking_in_progress();
683 698 }
684 699
685 700 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
686 701 assert(active_tasks <= _max_task_num, "we should not have more");
687 702
688 703 _active_tasks = active_tasks;
689 704 // Need to update the three data structures below according to the
690 705 // number of active threads for this phase.
691 706 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
692 707 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
693 708 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
694 709
695 710 _concurrent = concurrent;
696 711 // We propagate this to all tasks, not just the active ones.
697 712 for (int i = 0; i < (int) _max_task_num; ++i)
698 713 _tasks[i]->set_concurrent(concurrent);
699 714
700 715 if (concurrent) {
701 716 set_concurrent_marking_in_progress();
702 717 } else {
703 718 // We currently assume that the concurrent flag has been set to
704 719 // false before we start remark. At this point we should also be
705 720 // in a STW phase.
706 721 assert(!concurrent_marking_in_progress(), "invariant");
707 722 assert(_finger == _heap_end, "only way to get here");
708 723 update_g1_committed(true);
709 724 }
710 725 }
711 726
712 727 void ConcurrentMark::set_non_marking_state() {
713 728 // We set the global marking state to some default values when we're
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
714 729 // not doing marking.
715 730 clear_marking_state();
716 731 _active_tasks = 0;
717 732 clear_concurrent_marking_in_progress();
718 733 }
719 734
720 735 ConcurrentMark::~ConcurrentMark() {
721 736 for (int i = 0; i < (int) _max_task_num; ++i) {
722 737 delete _task_queues->queue(i);
723 738 delete _tasks[i];
739 +
740 + _count_card_bitmaps[i].resize(0, false);
741 + FREE_C_HEAP_ARRAY(size_t, _count_marked_bytes[i]);
724 742 }
743 +
725 744 delete _task_queues;
726 - FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
745 + FREE_C_HEAP_ARRAY(CMTask*, _tasks);
746 + FREE_C_HEAP_ARRAY(double, _accum_task_vtime);
747 +
748 + FREE_C_HEAP_ARRAY(BitMap*, _count_card_bitmaps);
749 + FREE_C_HEAP_ARRAY(size_t*, _count_marked_bytes);
727 750 }
728 751
729 752 // This closure is used to mark refs into the g1 generation
730 753 // from external roots in the CMS bit map.
731 754 // Called at the first checkpoint.
732 755 //
733 756
734 757 void ConcurrentMark::clearNextBitmap() {
735 758 G1CollectedHeap* g1h = G1CollectedHeap::heap();
736 759 G1CollectorPolicy* g1p = g1h->g1_policy();
737 760
738 761 // Make sure that the concurrent mark thread looks to still be in
739 762 // the current cycle.
740 763 guarantee(cmThread()->during_cycle(), "invariant");
741 764
742 765 // We are finishing up the current cycle by clearing the next
743 766 // marking bitmap and getting it ready for the next cycle. During
744 767 // this time no other cycle can start. So, let's make sure that this
745 768 // is the case.
746 769 guarantee(!g1h->mark_in_progress(), "invariant");
747 770
748 771 // clear the mark bitmap (no grey objects to start with).
749 772 // We need to do this in chunks and offer to yield in between
750 773 // each chunk.
751 774 HeapWord* start = _nextMarkBitMap->startWord();
752 775 HeapWord* end = _nextMarkBitMap->endWord();
753 776 HeapWord* cur = start;
754 777 size_t chunkSize = M;
755 778 while (cur < end) {
756 779 HeapWord* next = cur + chunkSize;
757 780 if (next > end) {
758 781 next = end;
759 782 }
760 783 MemRegion mr(cur,next);
761 784 _nextMarkBitMap->clearRange(mr);
762 785 cur = next;
763 786 do_yield_check();
764 787
765 788 // Repeat the asserts from above. We'll do them as asserts here to
766 789 // minimize their overhead on the product. However, we'll have
767 790 // them as guarantees at the beginning / end of the bitmap
768 791 // clearing to get some checking in the product.
769 792 assert(cmThread()->during_cycle(), "invariant");
770 793 assert(!g1h->mark_in_progress(), "invariant");
771 794 }
772 795
773 796 // Repeat the asserts from above.
774 797 guarantee(cmThread()->during_cycle(), "invariant");
775 798 guarantee(!g1h->mark_in_progress(), "invariant");
776 799 }
777 800
778 801 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
779 802 public:
780 803 bool doHeapRegion(HeapRegion* r) {
781 804 if (!r->continuesHumongous()) {
782 805 r->note_start_of_marking(true);
783 806 }
784 807 return false;
785 808 }
786 809 };
787 810
788 811 void ConcurrentMark::checkpointRootsInitialPre() {
789 812 G1CollectedHeap* g1h = G1CollectedHeap::heap();
790 813 G1CollectorPolicy* g1p = g1h->g1_policy();
791 814
792 815 _has_aborted = false;
793 816
794 817 #ifndef PRODUCT
795 818 if (G1PrintReachableAtInitialMark) {
796 819 print_reachable("at-cycle-start",
797 820 VerifyOption_G1UsePrevMarking, true /* all */);
798 821 }
799 822 #endif
800 823
801 824 // Initialise marking structures. This has to be done in a STW phase.
802 825 reset();
803 826 }
804 827
805 828
806 829 void ConcurrentMark::checkpointRootsInitialPost() {
807 830 G1CollectedHeap* g1h = G1CollectedHeap::heap();
808 831
809 832 // If we force an overflow during remark, the remark operation will
810 833 // actually abort and we'll restart concurrent marking. If we always
811 834 // force an oveflow during remark we'll never actually complete the
812 835 // marking phase. So, we initilize this here, at the start of the
813 836 // cycle, so that at the remaining overflow number will decrease at
814 837 // every remark and we'll eventually not need to cause one.
815 838 force_overflow_stw()->init();
816 839
817 840 // For each region note start of marking.
818 841 NoteStartOfMarkHRClosure startcl;
819 842 g1h->heap_region_iterate(&startcl);
820 843
821 844 // Start Concurrent Marking weak-reference discovery.
822 845 ReferenceProcessor* rp = g1h->ref_processor_cm();
823 846 // enable ("weak") refs discovery
824 847 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
825 848 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
826 849
827 850 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
828 851 // This is the start of the marking cycle, we're expected all
829 852 // threads to have SATB queues with active set to false.
830 853 satb_mq_set.set_active_all_threads(true, /* new active value */
831 854 false /* expected_active */);
832 855
833 856 // update_g1_committed() will be called at the end of an evac pause
834 857 // when marking is on. So, it's also called at the end of the
835 858 // initial-mark pause to update the heap end, if the heap expands
836 859 // during it. No need to call it here.
837 860 }
838 861
839 862 /*
840 863 * Notice that in the next two methods, we actually leave the STS
841 864 * during the barrier sync and join it immediately afterwards. If we
842 865 * do not do this, the following deadlock can occur: one thread could
843 866 * be in the barrier sync code, waiting for the other thread to also
844 867 * sync up, whereas another one could be trying to yield, while also
845 868 * waiting for the other threads to sync up too.
846 869 *
847 870 * Note, however, that this code is also used during remark and in
848 871 * this case we should not attempt to leave / enter the STS, otherwise
849 872 * we'll either hit an asseert (debug / fastdebug) or deadlock
850 873 * (product). So we should only leave / enter the STS if we are
851 874 * operating concurrently.
852 875 *
853 876 * Because the thread that does the sync barrier has left the STS, it
854 877 * is possible to be suspended for a Full GC or an evacuation pause
855 878 * could occur. This is actually safe, since the entering the sync
856 879 * barrier is one of the last things do_marking_step() does, and it
857 880 * doesn't manipulate any data structures afterwards.
858 881 */
859 882
860 883 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
861 884 if (verbose_low()) {
862 885 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
863 886 }
864 887
865 888 if (concurrent()) {
866 889 ConcurrentGCThread::stsLeave();
867 890 }
868 891 _first_overflow_barrier_sync.enter();
869 892 if (concurrent()) {
870 893 ConcurrentGCThread::stsJoin();
871 894 }
872 895 // at this point everyone should have synced up and not be doing any
873 896 // more work
874 897
875 898 if (verbose_low()) {
876 899 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
877 900 }
878 901
879 902 // let task 0 do this
880 903 if (task_num == 0) {
881 904 // task 0 is responsible for clearing the global data structures
882 905 // We should be here because of an overflow. During STW we should
883 906 // not clear the overflow flag since we rely on it being true when
884 907 // we exit this method to abort the pause and restart concurent
885 908 // marking.
886 909 clear_marking_state(concurrent() /* clear_overflow */);
887 910 force_overflow()->update();
888 911
889 912 if (PrintGC) {
890 913 gclog_or_tty->date_stamp(PrintGCDateStamps);
891 914 gclog_or_tty->stamp(PrintGCTimeStamps);
892 915 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
893 916 }
894 917 }
895 918
896 919 // after this, each task should reset its own data structures then
897 920 // then go into the second barrier
898 921 }
899 922
900 923 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
901 924 if (verbose_low()) {
902 925 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
903 926 }
904 927
905 928 if (concurrent()) {
906 929 ConcurrentGCThread::stsLeave();
907 930 }
908 931 _second_overflow_barrier_sync.enter();
909 932 if (concurrent()) {
910 933 ConcurrentGCThread::stsJoin();
911 934 }
912 935 // at this point everything should be re-initialised and ready to go
913 936
914 937 if (verbose_low()) {
915 938 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
916 939 }
917 940 }
918 941
919 942 #ifndef PRODUCT
920 943 void ForceOverflowSettings::init() {
921 944 _num_remaining = G1ConcMarkForceOverflow;
922 945 _force = false;
923 946 update();
924 947 }
925 948
926 949 void ForceOverflowSettings::update() {
927 950 if (_num_remaining > 0) {
928 951 _num_remaining -= 1;
929 952 _force = true;
930 953 } else {
931 954 _force = false;
932 955 }
933 956 }
934 957
↓ open down ↓ |
198 lines elided |
↑ open up ↑ |
935 958 bool ForceOverflowSettings::should_force() {
936 959 if (_force) {
937 960 _force = false;
938 961 return true;
939 962 } else {
940 963 return false;
941 964 }
942 965 }
943 966 #endif // !PRODUCT
944 967
945 -void ConcurrentMark::grayRoot(oop p) {
968 +void ConcurrentMark::grayRoot(oop p, int worker_i) {
946 969 HeapWord* addr = (HeapWord*) p;
947 970 // We can't really check against _heap_start and _heap_end, since it
948 971 // is possible during an evacuation pause with piggy-backed
949 972 // initial-mark that the committed space is expanded during the
950 973 // pause without CM observing this change. So the assertions below
951 974 // is a bit conservative; but better than nothing.
952 975 assert(_g1h->g1_committed().contains(addr),
953 976 "address should be within the heap bounds");
954 977
955 978 if (!_nextMarkBitMap->isMarked(addr)) {
956 - _nextMarkBitMap->parMark(addr);
979 + if (_nextMarkBitMap->parMark(addr)) {
980 + // Update the task specific count data for object p.
981 + add_to_count_data_for(p, worker_i);
982 + }
957 983 }
958 984 }
959 985
960 986 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
961 987 // The objects on the region have already been marked "in bulk" by
962 988 // the caller. We only need to decide whether to push the region on
963 989 // the region stack or not.
964 990
965 991 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
966 992 // We're done with marking and waiting for remark. We do not need to
967 993 // push anything else on the region stack.
968 994 return;
969 995 }
970 996
971 997 HeapWord* finger = _finger;
972 998
973 999 if (verbose_low()) {
974 1000 gclog_or_tty->print_cr("[global] attempting to push "
975 1001 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
976 1002 PTR_FORMAT, mr.start(), mr.end(), finger);
977 1003 }
978 1004
979 1005 if (mr.start() < finger) {
980 1006 // The finger is always heap region aligned and it is not possible
981 1007 // for mr to span heap regions.
982 1008 assert(mr.end() <= finger, "invariant");
983 1009
984 1010 // Separated the asserts so that we know which one fires.
985 1011 assert(mr.start() <= mr.end(),
986 1012 "region boundaries should fall within the committed space");
987 1013 assert(_heap_start <= mr.start(),
988 1014 "region boundaries should fall within the committed space");
989 1015 assert(mr.end() <= _heap_end,
990 1016 "region boundaries should fall within the committed space");
991 1017 if (verbose_low()) {
992 1018 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
993 1019 "below the finger, pushing it",
994 1020 mr.start(), mr.end());
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
995 1021 }
996 1022
997 1023 if (!region_stack_push_lock_free(mr)) {
998 1024 if (verbose_low()) {
999 1025 gclog_or_tty->print_cr("[global] region stack has overflown.");
1000 1026 }
1001 1027 }
1002 1028 }
1003 1029 }
1004 1030
1005 -void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1031 +void ConcurrentMark::markAndGrayObjectIfNecessary(oop p, int worker_i) {
1006 1032 // The object is not marked by the caller. We need to at least mark
1007 1033 // it and maybe push in on the stack.
1008 1034
1009 1035 HeapWord* addr = (HeapWord*)p;
1010 1036 if (!_nextMarkBitMap->isMarked(addr)) {
1011 1037 // We definitely need to mark it, irrespective whether we bail out
1012 1038 // because we're done with marking.
1013 1039 if (_nextMarkBitMap->parMark(addr)) {
1040 + // Update the task specific count data for object p
1041 + add_to_count_data_for(p, worker_i);
1042 +
1014 1043 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1015 1044 // If we're done with concurrent marking and we're waiting for
1016 1045 // remark, then we're not pushing anything on the stack.
1017 1046 return;
1018 1047 }
1019 1048
1020 1049 // No OrderAccess:store_load() is needed. It is implicit in the
1021 1050 // CAS done in parMark(addr) above
1022 1051 HeapWord* finger = _finger;
1023 1052
1024 1053 if (addr < finger) {
1025 1054 if (!mark_stack_push(oop(addr))) {
1026 1055 if (verbose_low()) {
1027 1056 gclog_or_tty->print_cr("[global] global stack overflow "
1028 1057 "during parMark");
1029 1058 }
1030 1059 }
1031 1060 }
1032 1061 }
1033 1062 }
1034 1063 }
1035 1064
1036 1065 class CMConcurrentMarkingTask: public AbstractGangTask {
1037 1066 private:
1038 1067 ConcurrentMark* _cm;
1039 1068 ConcurrentMarkThread* _cmt;
1040 1069
1041 1070 public:
1042 1071 void work(int worker_i) {
1043 1072 assert(Thread::current()->is_ConcurrentGC_thread(),
1044 1073 "this should only be done by a conc GC thread");
1045 1074 ResourceMark rm;
1046 1075
1047 1076 double start_vtime = os::elapsedVTime();
1048 1077
1049 1078 ConcurrentGCThread::stsJoin();
1050 1079
1051 1080 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1052 1081 CMTask* the_task = _cm->task(worker_i);
1053 1082 the_task->record_start_time();
1054 1083 if (!_cm->has_aborted()) {
1055 1084 do {
1056 1085 double start_vtime_sec = os::elapsedVTime();
1057 1086 double start_time_sec = os::elapsedTime();
1058 1087 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1059 1088
1060 1089 the_task->do_marking_step(mark_step_duration_ms,
1061 1090 true /* do_stealing */,
1062 1091 true /* do_termination */);
1063 1092
1064 1093 double end_time_sec = os::elapsedTime();
1065 1094 double end_vtime_sec = os::elapsedVTime();
1066 1095 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1067 1096 double elapsed_time_sec = end_time_sec - start_time_sec;
1068 1097 _cm->clear_has_overflown();
1069 1098
1070 1099 bool ret = _cm->do_yield_check(worker_i);
1071 1100
1072 1101 jlong sleep_time_ms;
1073 1102 if (!_cm->has_aborted() && the_task->has_aborted()) {
1074 1103 sleep_time_ms =
1075 1104 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1076 1105 ConcurrentGCThread::stsLeave();
1077 1106 os::sleep(Thread::current(), sleep_time_ms, false);
1078 1107 ConcurrentGCThread::stsJoin();
1079 1108 }
1080 1109 double end_time2_sec = os::elapsedTime();
1081 1110 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1082 1111
1083 1112 #if 0
1084 1113 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1085 1114 "overhead %1.4lf",
1086 1115 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1087 1116 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1088 1117 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1089 1118 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1090 1119 #endif
1091 1120 } while (!_cm->has_aborted() && the_task->has_aborted());
1092 1121 }
1093 1122 the_task->record_end_time();
1094 1123 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1095 1124
1096 1125 ConcurrentGCThread::stsLeave();
1097 1126
1098 1127 double end_vtime = os::elapsedVTime();
1099 1128 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1100 1129 }
1101 1130
1102 1131 CMConcurrentMarkingTask(ConcurrentMark* cm,
1103 1132 ConcurrentMarkThread* cmt) :
1104 1133 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1105 1134
1106 1135 ~CMConcurrentMarkingTask() { }
1107 1136 };
1108 1137
1109 1138 void ConcurrentMark::markFromRoots() {
1110 1139 // we might be tempted to assert that:
1111 1140 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1112 1141 // "inconsistent argument?");
1113 1142 // However that wouldn't be right, because it's possible that
1114 1143 // a safepoint is indeed in progress as a younger generation
1115 1144 // stop-the-world GC happens even as we mark in this generation.
1116 1145
1117 1146 _restart_for_overflow = false;
1118 1147
1119 1148 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1120 1149 force_overflow_conc()->init();
1121 1150 set_phase(active_workers, true /* concurrent */);
1122 1151
1123 1152 CMConcurrentMarkingTask markingTask(this, cmThread());
1124 1153 if (parallel_marking_threads() > 0) {
1125 1154 _parallel_workers->run_task(&markingTask);
1126 1155 } else {
1127 1156 markingTask.work(0);
1128 1157 }
1129 1158 print_stats();
1130 1159 }
1131 1160
1132 1161 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1133 1162 // world is stopped at this checkpoint
1134 1163 assert(SafepointSynchronize::is_at_safepoint(),
1135 1164 "world should be stopped");
1136 1165
1137 1166 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1138 1167
1139 1168 // If a full collection has happened, we shouldn't do this.
1140 1169 if (has_aborted()) {
1141 1170 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1142 1171 return;
1143 1172 }
1144 1173
1145 1174 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1146 1175
1147 1176 if (VerifyDuringGC) {
1148 1177 HandleMark hm; // handle scope
1149 1178 gclog_or_tty->print(" VerifyDuringGC:(before)");
1150 1179 Universe::heap()->prepare_for_verify();
1151 1180 Universe::verify(/* allow dirty */ true,
1152 1181 /* silent */ false,
1153 1182 /* option */ VerifyOption_G1UsePrevMarking);
1154 1183 }
1155 1184
1156 1185 G1CollectorPolicy* g1p = g1h->g1_policy();
1157 1186 g1p->record_concurrent_mark_remark_start();
1158 1187
1159 1188 double start = os::elapsedTime();
1160 1189
1161 1190 checkpointRootsFinalWork();
1162 1191
1163 1192 double mark_work_end = os::elapsedTime();
1164 1193
1165 1194 weakRefsWork(clear_all_soft_refs);
↓ open down ↓ |
142 lines elided |
↑ open up ↑ |
1166 1195
1167 1196 if (has_overflown()) {
1168 1197 // Oops. We overflowed. Restart concurrent marking.
1169 1198 _restart_for_overflow = true;
1170 1199 // Clear the flag. We do not need it any more.
1171 1200 clear_has_overflown();
1172 1201 if (G1TraceMarkStackOverflow) {
1173 1202 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1174 1203 }
1175 1204 } else {
1205 + // Aggregate the per-task counting data that we have accumulated
1206 + // while marking.
1207 + aggregate_all_count_data();
1208 +
1176 1209 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1177 1210 // We're done with marking.
1178 1211 // This is the end of the marking cycle, we're expected all
1179 1212 // threads to have SATB queues with active set to true.
1180 1213 satb_mq_set.set_active_all_threads(false, /* new active value */
1181 1214 true /* expected_active */);
1182 1215
1183 1216 if (VerifyDuringGC) {
1184 1217 HandleMark hm; // handle scope
1185 1218 gclog_or_tty->print(" VerifyDuringGC:(after)");
1186 1219 Universe::heap()->prepare_for_verify();
1187 1220 Universe::verify(/* allow dirty */ true,
1188 1221 /* silent */ false,
1189 1222 /* option */ VerifyOption_G1UseNextMarking);
1190 1223 }
1191 1224 assert(!restart_for_overflow(), "sanity");
1192 1225 }
1193 1226
1194 1227 // Reset the marking state if marking completed
1195 1228 if (!restart_for_overflow()) {
1196 1229 set_non_marking_state();
1197 1230 }
1198 1231
1199 1232 #if VERIFY_OBJS_PROCESSED
1200 1233 _scan_obj_cl.objs_processed = 0;
1201 1234 ThreadLocalObjQueue::objs_enqueued = 0;
1202 1235 #endif
1203 1236
1204 1237 // Statistics
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
1205 1238 double now = os::elapsedTime();
1206 1239 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1207 1240 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1208 1241 _remark_times.add((now - start) * 1000.0);
1209 1242
1210 1243 g1p->record_concurrent_mark_remark_end();
1211 1244 }
1212 1245
1213 1246 #define CARD_BM_TEST_MODE 0
1214 1247
1248 +// Used to calculate the # live objects per region
1249 +// for verification purposes
1215 1250 class CalcLiveObjectsClosure: public HeapRegionClosure {
1216 1251
1217 1252 CMBitMapRO* _bm;
1218 1253 ConcurrentMark* _cm;
1219 - bool _changed;
1220 - bool _yield;
1221 - size_t _words_done;
1254 + BitMap* _region_bm;
1255 + BitMap* _card_bm;
1256 +
1257 + size_t _tot_words_done;
1222 1258 size_t _tot_live;
1223 1259 size_t _tot_used;
1224 - size_t _regions_done;
1225 - double _start_vtime_sec;
1226 1260
1227 - BitMap* _region_bm;
1228 - BitMap* _card_bm;
1261 + size_t _region_marked_bytes;
1262 +
1229 1263 intptr_t _bottom_card_num;
1230 - bool _final;
1231 1264
1232 1265 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1233 - for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1266 + BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1267 + BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1268 +
1269 + for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1234 1270 #if CARD_BM_TEST_MODE
1235 - guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1271 + guarantee(_card_bm->at(i), "Should already be set.");
1236 1272 #else
1237 - _card_bm->par_at_put(i - _bottom_card_num, 1);
1273 + _card_bm->par_at_put(i, 1);
1238 1274 #endif
1239 1275 }
1240 1276 }
1241 1277
1242 1278 public:
1243 - CalcLiveObjectsClosure(bool final,
1244 - CMBitMapRO *bm, ConcurrentMark *cm,
1279 + CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1245 1280 BitMap* region_bm, BitMap* card_bm) :
1246 - _bm(bm), _cm(cm), _changed(false), _yield(true),
1247 - _words_done(0), _tot_live(0), _tot_used(0),
1248 - _region_bm(region_bm), _card_bm(card_bm),_final(final),
1249 - _regions_done(0), _start_vtime_sec(0.0)
1281 + _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1282 + _region_marked_bytes(0), _tot_words_done(0),
1283 + _tot_live(0), _tot_used(0)
1250 1284 {
1251 1285 _bottom_card_num =
1252 1286 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1253 1287 CardTableModRefBS::card_shift);
1254 1288 }
1255 1289
1256 1290 // It takes a region that's not empty (i.e., it has at least one
1257 1291 // live object in it and sets its corresponding bit on the region
1258 1292 // bitmap to 1. If the region is "starts humongous" it will also set
1259 1293 // to 1 the bits on the region bitmap that correspond to its
1260 1294 // associated "continues humongous" regions.
1261 1295 void set_bit_for_region(HeapRegion* hr) {
1262 1296 assert(!hr->continuesHumongous(), "should have filtered those out");
1263 1297
1264 1298 size_t index = hr->hrs_index();
1265 1299 if (!hr->startsHumongous()) {
1266 1300 // Normal (non-humongous) case: just set the bit.
1267 1301 _region_bm->par_at_put((BitMap::idx_t) index, true);
1268 1302 } else {
1269 1303 // Starts humongous case: calculate how many regions are part of
1270 1304 // this humongous region and then set the bit range. It might
1271 1305 // have been a bit more efficient to look at the object that
1272 1306 // spans these humongous regions to calculate their number from
1273 1307 // the object's size. However, it's a good idea to calculate
1274 1308 // this based on the metadata itself, and not the region
1275 1309 // contents, so that this code is not aware of what goes into
1276 1310 // the humongous regions (in case this changes in the future).
1277 1311 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1278 1312 size_t end_index = index + 1;
1279 1313 while (end_index < g1h->n_regions()) {
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
1280 1314 HeapRegion* chr = g1h->region_at(end_index);
1281 1315 if (!chr->continuesHumongous()) break;
1282 1316 end_index += 1;
1283 1317 }
1284 1318 _region_bm->par_at_put_range((BitMap::idx_t) index,
1285 1319 (BitMap::idx_t) end_index, true);
1286 1320 }
1287 1321 }
1288 1322
1289 1323 bool doHeapRegion(HeapRegion* hr) {
1290 - if (!_final && _regions_done == 0) {
1291 - _start_vtime_sec = os::elapsedVTime();
1292 - }
1293 1324
1294 1325 if (hr->continuesHumongous()) {
1295 1326 // We will ignore these here and process them when their
1296 1327 // associated "starts humongous" region is processed (see
1297 1328 // set_bit_for_heap_region()). Note that we cannot rely on their
1298 1329 // associated "starts humongous" region to have their bit set to
1299 1330 // 1 since, due to the region chunking in the parallel region
1300 1331 // iteration, a "continues humongous" region might be visited
1301 1332 // before its associated "starts humongous".
1302 1333 return false;
1303 1334 }
1304 1335
1305 1336 HeapWord* nextTop = hr->next_top_at_mark_start();
1306 - HeapWord* start = hr->top_at_conc_mark_count();
1307 - assert(hr->bottom() <= start && start <= hr->end() &&
1308 - hr->bottom() <= nextTop && nextTop <= hr->end() &&
1309 - start <= nextTop,
1310 - "Preconditions.");
1311 - // Otherwise, record the number of word's we'll examine.
1337 + HeapWord* start = hr->bottom();
1338 +
1339 + assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1340 + "Preconditions.");
1341 +
1342 + // Record the number of word's we'll examine.
1312 1343 size_t words_done = (nextTop - start);
1344 +
1313 1345 // Find the first marked object at or after "start".
1314 1346 start = _bm->getNextMarkedWordAddress(start, nextTop);
1347 +
1315 1348 size_t marked_bytes = 0;
1349 + _region_marked_bytes = 0;
1316 1350
1317 1351 // Below, the term "card num" means the result of shifting an address
1318 1352 // by the card shift -- address 0 corresponds to card number 0. One
1319 1353 // must subtract the card num of the bottom of the heap to obtain a
1320 1354 // card table index.
1355 +
1321 1356 // The first card num of the sequence of live cards currently being
1322 1357 // constructed. -1 ==> no sequence.
1323 1358 intptr_t start_card_num = -1;
1359 +
1324 1360 // The last card num of the sequence of live cards currently being
1325 1361 // constructed. -1 ==> no sequence.
1326 1362 intptr_t last_card_num = -1;
1327 1363
1328 1364 while (start < nextTop) {
1329 - if (_yield && _cm->do_yield_check()) {
1330 - // We yielded. It might be for a full collection, in which case
1331 - // all bets are off; terminate the traversal.
1332 - if (_cm->has_aborted()) {
1333 - _changed = false;
1334 - return true;
1335 - } else {
1336 - // Otherwise, it might be a collection pause, and the region
1337 - // we're looking at might be in the collection set. We'll
1338 - // abandon this region.
1339 - return false;
1340 - }
1341 - }
1342 1365 oop obj = oop(start);
1343 1366 int obj_sz = obj->size();
1367 +
1344 1368 // The card num of the start of the current object.
1345 1369 intptr_t obj_card_num =
1346 1370 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1347 -
1348 1371 HeapWord* obj_last = start + obj_sz - 1;
1349 1372 intptr_t obj_last_card_num =
1350 1373 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1351 1374
1352 1375 if (obj_card_num != last_card_num) {
1353 1376 if (start_card_num == -1) {
1354 1377 assert(last_card_num == -1, "Both or neither.");
1355 1378 start_card_num = obj_card_num;
1356 1379 } else {
1357 1380 assert(last_card_num != -1, "Both or neither.");
1358 1381 assert(obj_card_num >= last_card_num, "Inv");
1359 1382 if ((obj_card_num - last_card_num) > 1) {
1360 1383 // Mark the last run, and start a new one.
1361 1384 mark_card_num_range(start_card_num, last_card_num);
1362 1385 start_card_num = obj_card_num;
1363 1386 }
1364 1387 }
1365 1388 #if CARD_BM_TEST_MODE
1366 1389 /*
1367 1390 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1368 1391 obj_card_num - _bottom_card_num,
1369 1392 obj_last_card_num - _bottom_card_num);
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
1370 1393 */
1371 1394 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1372 1395 _card_bm->par_at_put(j - _bottom_card_num, 1);
1373 1396 }
1374 1397 #endif
1375 1398 }
1376 1399 // In any case, we set the last card num.
1377 1400 last_card_num = obj_last_card_num;
1378 1401
1379 1402 marked_bytes += (size_t)obj_sz * HeapWordSize;
1403 +
1380 1404 // Find the next marked object after this one.
1381 1405 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1382 - _changed = true;
1383 1406 }
1407 +
1384 1408 // Handle the last range, if any.
1385 1409 if (start_card_num != -1) {
1386 1410 mark_card_num_range(start_card_num, last_card_num);
1387 1411 }
1388 - if (_final) {
1389 - // Mark the allocated-since-marking portion...
1390 - HeapWord* tp = hr->top();
1391 - if (nextTop < tp) {
1392 - start_card_num =
1393 - intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1394 - last_card_num =
1395 - intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1396 - mark_card_num_range(start_card_num, last_card_num);
1397 - // This definitely means the region has live objects.
1398 - set_bit_for_region(hr);
1399 - }
1412 +
1413 + // Mark the allocated-since-marking portion...
1414 + HeapWord* top = hr->top();
1415 + if (nextTop < top) {
1416 + start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1417 + last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1418 +
1419 + mark_card_num_range(start_card_num, last_card_num);
1420 +
1421 + // This definitely means the region has live objects.
1422 + set_bit_for_region(hr);
1400 1423 }
1401 1424
1402 - hr->add_to_marked_bytes(marked_bytes);
1403 1425 // Update the live region bitmap.
1404 1426 if (marked_bytes > 0) {
1405 1427 set_bit_for_region(hr);
1406 1428 }
1407 - hr->set_top_at_conc_mark_count(nextTop);
1429 +
1430 + // Set the marked bytes for the current region so that
1431 + // it can be queried by a calling verificiation routine
1432 + _region_marked_bytes = marked_bytes;
1433 +
1408 1434 _tot_live += hr->next_live_bytes();
1409 1435 _tot_used += hr->used();
1410 - _words_done = words_done;
1436 + _tot_words_done = words_done;
1411 1437
1412 - if (!_final) {
1413 - ++_regions_done;
1414 - if (_regions_done % 10 == 0) {
1415 - double end_vtime_sec = os::elapsedVTime();
1416 - double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1417 - if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1418 - jlong sleep_time_ms =
1419 - (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1420 - os::sleep(Thread::current(), sleep_time_ms, false);
1421 - _start_vtime_sec = end_vtime_sec;
1422 - }
1438 + return false;
1439 + }
1440 +
1441 + size_t region_marked_bytes() const { return _region_marked_bytes; }
1442 + size_t tot_words_done() const { return _tot_words_done; }
1443 + size_t tot_live() const { return _tot_live; }
1444 + size_t tot_used() const { return _tot_used; }
1445 +};
1446 +
1447 +// Aggregate the counting data that was constructed concurrently
1448 +// with marking.
1449 +class AddToMarkedBytesClosure: public HeapRegionClosure {
1450 + ConcurrentMark* _cm;
1451 + size_t _task_num;
1452 + size_t _max_task_num;
1453 +
1454 + bool _final;
1455 +
1456 +public:
1457 + AddToMarkedBytesClosure(ConcurrentMark *cm,
1458 + size_t task_num,
1459 + size_t max_task_num) :
1460 + _cm(cm),
1461 + _task_num(task_num),
1462 + _max_task_num(max_task_num),
1463 + _final(false)
1464 + {
1465 + assert(0 <= _task_num && _task_num < _max_task_num, "sanity");
1466 + if ((_max_task_num - _task_num) == 1) {
1467 + // Last task
1468 + _final = true;
1469 + }
1470 + }
1471 +
1472 + bool doHeapRegion(HeapRegion* hr) {
1473 + // Adds the value in the counted marked bytes array for
1474 + // _task_num for region hr to the value cached in heap
1475 + // region itself.
1476 + // For the final task we also set the top at conc count
1477 + // for the region.
1478 + // The bits in the live region bitmap are set for regions
1479 + // that contain live data during the cleanup pause.
1480 +
1481 + if (hr->continuesHumongous()) {
1482 + // We will ignore these here and process them when their
1483 + // associated "starts humongous" region is processed.
1484 + // Note that we cannot rely on their associated
1485 + // "starts humongous" region to have their bit set to 1
1486 + // since, due to the region chunking in the parallel region
1487 + // iteration, a "continues humongous" region might be visited
1488 + // before its associated "starts humongous".
1489 + return false;
1490 + }
1491 +
1492 + int hrs_index = hr->hrs_index();
1493 + size_t* marked_bytes_array = _cm->count_marked_bytes_for(_task_num);
1494 + size_t marked_bytes = marked_bytes_array[hrs_index];
1495 + hr->add_to_marked_bytes(marked_bytes);
1496 +
1497 + if (_final) {
1498 + HeapWord* ntams = hr->next_top_at_mark_start();
1499 + HeapWord* start = hr->bottom();
1500 +
1501 + assert(start <= ntams && ntams <= hr->top() && hr->top() <= hr->end(),
1502 + "Preconditions.");
1503 +
1504 + hr->set_top_at_conc_mark_count(ntams);
1505 + }
1506 +
1507 + return false;
1508 + }
1509 +};
1510 +
1511 +void ConcurrentMark::aggregate_all_count_data() {
1512 + _card_bm.clear();
1513 +
1514 + // Unions the per task card bitmaps into the global card bitmap,
1515 + // and aggregates the per task marked bytes for each region into
1516 + // the heap region itself.
1517 +
1518 + for (int i = 0; i < _max_task_num; i += 1) {
1519 + BitMap& task_card_bm = count_card_bitmap_for(i);
1520 + _card_bm.set_union(task_card_bm);
1521 +
1522 + // Update the marked bytes for each region
1523 + AddToMarkedBytesClosure cl(this, i, _max_task_num);
1524 + _g1h->heap_region_iterate(&cl);
1525 + }
1526 +
1527 + // We're done with the accumulated per-task concurrent
1528 + // counting data so let's clear it for the next marking.
1529 + clear_all_count_data();
1530 +}
1531 +
1532 +// Final update of count data (during cleanup).
1533 +// Adds [top_at_count, NTAMS) to the marked bytes for each
1534 +// region. Sets the bits in the card bitmap corresponding
1535 +// to the interval [top_at_count, top], and sets the
1536 +// liveness bit for each region containing live data
1537 +// in the region bitmap.
1538 +
1539 +class FinalCountDataUpdateClosure: public HeapRegionClosure {
1540 + ConcurrentMark* _cm;
1541 + BitMap* _region_bm;
1542 + BitMap* _card_bm;
1543 + intptr_t _bottom_card_num;
1544 +
1545 + size_t _total_live_bytes;
1546 + size_t _total_used_bytes;
1547 + size_t _total_words_done;
1548 +
1549 + void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1550 + BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1551 + BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1552 +
1553 + // Inclusive bit range [start_idx, last_idx]. par_at_put_range
1554 + // is exclusive so we have to also set the bit for last_idx.
1555 + // Passing last_idx+1 to the clear_range would work in
1556 + // most cases but could trip an OOB assertion.
1557 +
1558 + if ((last_idx - start_idx) > 0) {
1559 + _card_bm->par_at_put_range(start_idx, last_idx, true);
1560 + }
1561 + _card_bm->par_set_bit(last_idx);
1562 + }
1563 +
1564 + // It takes a region that's not empty (i.e., it has at least one
1565 + // live object in it and sets its corresponding bit on the region
1566 + // bitmap to 1. If the region is "starts humongous" it will also set
1567 + // to 1 the bits on the region bitmap that correspond to its
1568 + // associated "continues humongous" regions.
1569 + void set_bit_for_region(HeapRegion* hr) {
1570 + assert(!hr->continuesHumongous(), "should have filtered those out");
1571 +
1572 + size_t index = hr->hrs_index();
1573 + if (!hr->startsHumongous()) {
1574 + // Normal (non-humongous) case: just set the bit.
1575 + _region_bm->par_set_bit((BitMap::idx_t) index);
1576 + } else {
1577 + // Starts humongous case: calculate how many regions are part of
1578 + // this humongous region and then set the bit range. It might
1579 + // have been a bit more efficient to look at the object that
1580 + // spans these humongous regions to calculate their number from
1581 + // the object's size. However, it's a good idea to calculate
1582 + // this based on the metadata itself, and not the region
1583 + // contents, so that this code is not aware of what goes into
1584 + // the humongous regions (in case this changes in the future).
1585 + G1CollectedHeap* g1h = G1CollectedHeap::heap();
1586 + size_t end_index = index + 1;
1587 + while (end_index < g1h->n_regions()) {
1588 + HeapRegion* chr = g1h->region_at(end_index);
1589 + if (!chr->continuesHumongous()) break;
1590 + end_index += 1;
1423 1591 }
1592 + _region_bm->par_at_put_range((BitMap::idx_t) index,
1593 + (BitMap::idx_t) end_index, true);
1424 1594 }
1595 + }
1596 +
1597 + public:
1598 + FinalCountDataUpdateClosure(ConcurrentMark* cm,
1599 + BitMap* region_bm,
1600 + BitMap* card_bm) :
1601 + _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1602 + _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0)
1603 + {
1604 + _bottom_card_num =
1605 + intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1606 + CardTableModRefBS::card_shift);
1607 + }
1608 +
1609 + bool doHeapRegion(HeapRegion* hr) {
1610 +
1611 + if (hr->continuesHumongous()) {
1612 + // We will ignore these here and process them when their
1613 + // associated "starts humongous" region is processed (see
1614 + // set_bit_for_heap_region()). Note that we cannot rely on their
1615 + // associated "starts humongous" region to have their bit set to
1616 + // 1 since, due to the region chunking in the parallel region
1617 + // iteration, a "continues humongous" region might be visited
1618 + // before its associated "starts humongous".
1619 + return false;
1620 + }
1621 +
1622 + HeapWord* start = hr->top_at_conc_mark_count();
1623 + HeapWord* ntams = hr->next_top_at_mark_start();
1624 + HeapWord* top = hr->top();
1625 +
1626 + assert(hr->bottom() <= start && start <= hr->end() &&
1627 + hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1628 +
1629 + size_t words_done = ntams - hr->bottom();
1630 +
1631 + intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1632 + intptr_t last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1633 +
1634 +
1635 + if (start < ntams) {
1636 + // Region was changed between remark and cleanup pauses
1637 + // We need to add (ntams - start) to the marked bytes
1638 + // for this region, and set bits for the range
1639 + // [ card_num(start), card_num(ntams) ) in the
1640 + // card bitmap.
1641 + size_t live_bytes = (ntams - start) * HeapWordSize;
1642 + hr->add_to_marked_bytes(live_bytes);
1643 +
1644 + // Record the new top at conc count
1645 + hr->set_top_at_conc_mark_count(ntams);
1646 +
1647 + // The setting of the bits card bitmap takes place below
1648 + }
1649 +
1650 + // Mark the allocated-since-marking portion...
1651 + if (ntams < top) {
1652 + // This definitely means the region has live objects.
1653 + set_bit_for_region(hr);
1654 + }
1655 +
1656 + // Now set the bits for [start, top]
1657 + mark_card_num_range(start_card_num, last_card_num);
1658 +
1659 + // Set the bit for the region if it contains live data
1660 + if (hr->next_marked_bytes() > 0) {
1661 + set_bit_for_region(hr);
1662 + }
1663 +
1664 + _total_words_done += words_done;
1665 + _total_used_bytes += hr->used();
1666 + _total_live_bytes += hr->next_marked_bytes();
1425 1667
1426 1668 return false;
1427 1669 }
1428 1670
1429 - bool changed() { return _changed; }
1430 - void reset() { _changed = false; _words_done = 0; }
1431 - void no_yield() { _yield = false; }
1432 - size_t words_done() { return _words_done; }
1433 - size_t tot_live() { return _tot_live; }
1434 - size_t tot_used() { return _tot_used; }
1671 + size_t total_words_done() const { return _total_words_done; }
1672 + size_t total_live_bytes() const { return _total_live_bytes; }
1673 + size_t total_used_bytes() const { return _total_used_bytes; }
1435 1674 };
1436 1675
1676 +// Heap region closure used for verifying the counting data
1677 +// that was accumulated concurrently and aggregated during
1678 +// the remark pause. This closure is applied to the heap
1679 +// regions during the STW cleanup pause.
1437 1680
1438 -void ConcurrentMark::calcDesiredRegions() {
1439 - _region_bm.clear();
1440 - _card_bm.clear();
1441 - CalcLiveObjectsClosure calccl(false /*final*/,
1442 - nextMarkBitMap(), this,
1443 - &_region_bm, &_card_bm);
1444 - G1CollectedHeap *g1h = G1CollectedHeap::heap();
1445 - g1h->heap_region_iterate(&calccl);
1681 +class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1682 + ConcurrentMark* _cm;
1683 + CalcLiveObjectsClosure _calc_cl;
1684 + BitMap* _region_bm; // Region BM to be verified
1685 + BitMap* _card_bm; // Card BM to be verified
1686 + bool _verbose; // verbose output?
1446 1687
1447 - do {
1448 - calccl.reset();
1449 - g1h->heap_region_iterate(&calccl);
1450 - } while (calccl.changed());
1451 -}
1688 + BitMap* _exp_region_bm; // Expected Region BM values
1689 + BitMap* _exp_card_bm; // Expected card BM values
1690 +
1691 + intptr_t _bottom_card_num; // Used for calculatint bitmap indices
1692 +
1693 + int _failures;
1694 +
1695 +public:
1696 + VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1697 + BitMap* region_bm,
1698 + BitMap* card_bm,
1699 + BitMap* exp_region_bm,
1700 + BitMap* exp_card_bm,
1701 + bool verbose) :
1702 + _cm(cm),
1703 + _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1704 + _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1705 + _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1706 + _failures(0)
1707 + {
1708 + _bottom_card_num =
1709 + intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1710 + CardTableModRefBS::card_shift);
1711 + }
1712 +
1713 + int failures() const { return _failures; }
1714 +
1715 + bool doHeapRegion(HeapRegion* hr) {
1716 + if (hr->continuesHumongous()) {
1717 + // We will ignore these here and process them when their
1718 + // associated "starts humongous" region is processed (see
1719 + // set_bit_for_heap_region()). Note that we cannot rely on their
1720 + // associated "starts humongous" region to have their bit set to
1721 + // 1 since, due to the region chunking in the parallel region
1722 + // iteration, a "continues humongous" region might be visited
1723 + // before its associated "starts humongous".
1724 + return false;
1725 + }
1726 +
1727 + // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1728 + // this region and set the corresponding bits in the expected region
1729 + // and card bitmaps.
1730 + bool res = _calc_cl.doHeapRegion(hr);
1731 + assert(res == false, "should be continuing");
1732 +
1733 + // Note that the calculated count data could be a subset of the
1734 + // count data that was accumlated during marking. See the comment
1735 + // in G1ParCopyHelper::copy_to_survivor space for an explanation
1736 + // why.
1737 +
1738 + if (_verbose) {
1739 + gclog_or_tty->print("Region %d: bottom: "PTR_FORMAT", ntams: "
1740 + PTR_FORMAT", top: "PTR_FORMAT", end: "PTR_FORMAT,
1741 + hr->hrs_index(), hr->bottom(), hr->next_top_at_mark_start(),
1742 + hr->top(), hr->end());
1743 + gclog_or_tty->print_cr(", marked_bytes: calc/actual "SIZE_FORMAT"/"SIZE_FORMAT,
1744 + _calc_cl.region_marked_bytes(),
1745 + hr->next_marked_bytes());
1746 + }
1747 +
1748 + // Verify that _top_at_conc_count == ntams
1749 + if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
1750 + if (_verbose) {
1751 + gclog_or_tty->print_cr("Region %d: top at conc count incorrect: expected "
1752 + PTR_FORMAT", actual: "PTR_FORMAT,
1753 + hr->hrs_index(), hr->next_top_at_mark_start(),
1754 + hr->top_at_conc_mark_count());
1755 + }
1756 + _failures += 1;
1757 + }
1758 +
1759 + // Verify the marked bytes for this region.
1760 + size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1761 + size_t act_marked_bytes = hr->next_marked_bytes();
1762 +
1763 + // We're OK if actual marked bytes >= expected.
1764 + if (exp_marked_bytes > act_marked_bytes) {
1765 + if (_verbose) {
1766 + gclog_or_tty->print_cr("Region %d: marked bytes mismatch: expected: "
1767 + SIZE_FORMAT", actual: "SIZE_FORMAT,
1768 + hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1769 + }
1770 + _failures += 1;
1771 + }
1772 +
1773 + // Verify the bit, for this region, in the actual and expected
1774 + // (which was just calculated) region bit maps.
1775 + // We're not OK if the expected bit is set and the actual is not set.
1776 + BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
1777 +
1778 + bool expected = _exp_region_bm->at(index);
1779 + bool actual = _region_bm->at(index);
1780 + if (expected && !actual) {
1781 + if (_verbose) {
1782 + gclog_or_tty->print_cr("Region %d: region bitmap mismatch: expected: %d, actual: %d",
1783 + hr->hrs_index(), expected, actual);
1784 + }
1785 + _failures += 1;
1786 + }
1787 +
1788 + // Verify that the card bit maps for the cards spanned by the current
1789 + // region match. The set of offsets that have set bits in the expected
1790 + // bitmap should be a subset of the offsets with set bits from the actual
1791 + // calculated card bitmap.
1792 + // Again it's more important that if the expected bit is set then the
1793 + // actual bit be set.
1794 + intptr_t start_card_num =
1795 + intptr_t(uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift);
1796 + intptr_t top_card_num =
1797 + intptr_t(uintptr_t(hr->top()) >> CardTableModRefBS::card_shift);
1798 +
1799 + BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1800 + BitMap::idx_t end_idx = top_card_num - _bottom_card_num;
1801 +
1802 + for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1803 + expected = _exp_card_bm->at(i);
1804 + actual = _card_bm->at(i);
1805 +
1806 + if (expected && !actual) {
1807 + if (_verbose) {
1808 + gclog_or_tty->print_cr("Region %d: card bitmap mismatch at idx %d: expected: %d, actual: %d",
1809 + hr->hrs_index(), i, expected, actual);
1810 + }
1811 + _failures += 1;
1812 + }
1813 + }
1814 + if (_failures) {
1815 + // Stop iteration?
1816 + return true;
1817 + }
1818 +
1819 + return false;
1820 + }
1821 +};
1822 +
1823 +class Mux2HRClosure: public HeapRegionClosure {
1824 + HeapRegionClosure* _cl1;
1825 + HeapRegionClosure* _cl2;
1826 +
1827 +public:
1828 + Mux2HRClosure(HeapRegionClosure *c1, HeapRegionClosure *c2) : _cl1(c1), _cl2(c2) { }
1829 + bool doHeapRegion(HeapRegion* hr) {
1830 + bool res1 = _cl1->doHeapRegion(hr);
1831 + bool res2 = _cl2->doHeapRegion(hr);
1832 +
1833 + // Only continue if both return false;
1834 + return res1 || res2;
1835 + }
1836 +};
1452 1837
1453 1838 class G1ParFinalCountTask: public AbstractGangTask {
1454 1839 protected:
1455 1840 G1CollectedHeap* _g1h;
1456 1841 CMBitMap* _bm;
1457 1842 size_t _n_workers;
1458 1843 size_t *_live_bytes;
1459 1844 size_t *_used_bytes;
1460 - BitMap* _region_bm;
1461 - BitMap* _card_bm;
1845 +
1846 + BitMap* _actual_region_bm;
1847 + BitMap* _actual_card_bm;
1848 +
1849 + BitMap _expected_region_bm;
1850 + BitMap _expected_card_bm;
1851 +
1852 + int _failures;
1853 +
1462 1854 public:
1463 1855 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1464 1856 BitMap* region_bm, BitMap* card_bm)
1465 - : AbstractGangTask("G1 final counting"), _g1h(g1h),
1466 - _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
1857 + : AbstractGangTask("G1 final counting"),
1858 + _g1h(g1h), _bm(bm),
1859 + _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1860 + _expected_region_bm(0, false), _expected_card_bm(0, false),
1861 + _failures(0)
1862 + {
1467 1863 if (ParallelGCThreads > 0) {
1468 1864 _n_workers = _g1h->workers()->total_workers();
1469 1865 } else {
1470 1866 _n_workers = 1;
1471 1867 }
1868 +
1472 1869 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1473 1870 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1871 +
1872 + if (VerifyDuringGC) {
1873 + _expected_card_bm.resize(_actual_card_bm->size(), false);
1874 + _expected_region_bm.resize(_actual_region_bm->size(), false);
1875 + }
1474 1876 }
1475 1877
1476 1878 ~G1ParFinalCountTask() {
1879 + if (VerifyDuringGC) {
1880 + _expected_region_bm.resize(0);
1881 + _expected_card_bm.resize(0);
1882 + }
1477 1883 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1478 1884 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1479 1885 }
1480 1886
1481 1887 void work(int i) {
1482 - CalcLiveObjectsClosure calccl(true /*final*/,
1483 - _bm, _g1h->concurrent_mark(),
1484 - _region_bm, _card_bm);
1485 - calccl.no_yield();
1888 +
1889 + FinalCountDataUpdateClosure final_update_cl(_g1h->concurrent_mark(),
1890 + _actual_region_bm, _actual_card_bm);
1891 +
1892 + VerifyLiveObjectDataHRClosure verify_cl(_g1h->concurrent_mark(),
1893 + _actual_region_bm, _actual_card_bm,
1894 + &_expected_region_bm,
1895 + &_expected_card_bm,
1896 + true /* verbose */);
1897 +
1898 + Mux2HRClosure update_and_verify_cl(&final_update_cl, &verify_cl);
1899 +
1900 + HeapRegionClosure* hr_cl = &final_update_cl;
1901 + if (VerifyDuringGC) {
1902 + hr_cl = &update_and_verify_cl;
1903 + }
1904 +
1486 1905 if (G1CollectedHeap::use_parallel_gc_threads()) {
1487 - _g1h->heap_region_par_iterate_chunked(&calccl, i,
1906 + _g1h->heap_region_par_iterate_chunked(hr_cl, i,
1488 1907 HeapRegion::FinalCountClaimValue);
1489 1908 } else {
1490 - _g1h->heap_region_iterate(&calccl);
1909 + _g1h->heap_region_iterate(hr_cl);
1491 1910 }
1492 - assert(calccl.complete(), "Shouldn't have yielded!");
1493 1911
1494 1912 assert((size_t) i < _n_workers, "invariant");
1495 - _live_bytes[i] = calccl.tot_live();
1496 - _used_bytes[i] = calccl.tot_used();
1913 + _live_bytes[i] = final_update_cl.total_live_bytes();
1914 + _used_bytes[i] = final_update_cl.total_used_bytes();
1915 +
1916 + if (VerifyDuringGC) {
1917 + _failures += verify_cl.failures();
1918 + }
1497 1919 }
1920 +
1498 1921 size_t live_bytes() {
1499 1922 size_t live_bytes = 0;
1500 1923 for (size_t i = 0; i < _n_workers; ++i)
1501 1924 live_bytes += _live_bytes[i];
1502 1925 return live_bytes;
1503 1926 }
1927 +
1504 1928 size_t used_bytes() {
1505 1929 size_t used_bytes = 0;
1506 1930 for (size_t i = 0; i < _n_workers; ++i)
1507 1931 used_bytes += _used_bytes[i];
1508 1932 return used_bytes;
1509 1933 }
1934 +
1935 + int failures() const { return _failures; }
1510 1936 };
1511 1937
1512 1938 class G1ParNoteEndTask;
1513 1939
1514 1940 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1515 1941 G1CollectedHeap* _g1;
1516 1942 int _worker_num;
1517 1943 size_t _max_live_bytes;
1518 1944 size_t _regions_claimed;
1519 1945 size_t _freed_bytes;
1520 1946 FreeRegionList* _local_cleanup_list;
1521 1947 HumongousRegionSet* _humongous_proxy_set;
1522 1948 HRRSCleanupTask* _hrrs_cleanup_task;
1523 1949 double _claimed_region_time;
1524 1950 double _max_region_time;
1525 1951
1526 1952 public:
1527 1953 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1528 1954 int worker_num,
1529 1955 FreeRegionList* local_cleanup_list,
1530 1956 HumongousRegionSet* humongous_proxy_set,
1531 1957 HRRSCleanupTask* hrrs_cleanup_task);
1532 1958 size_t freed_bytes() { return _freed_bytes; }
1533 1959
1534 1960 bool doHeapRegion(HeapRegion *r);
1535 1961
1536 1962 size_t max_live_bytes() { return _max_live_bytes; }
1537 1963 size_t regions_claimed() { return _regions_claimed; }
1538 1964 double claimed_region_time_sec() { return _claimed_region_time; }
1539 1965 double max_region_time_sec() { return _max_region_time; }
1540 1966 };
1541 1967
1542 1968 class G1ParNoteEndTask: public AbstractGangTask {
1543 1969 friend class G1NoteEndOfConcMarkClosure;
1544 1970
1545 1971 protected:
1546 1972 G1CollectedHeap* _g1h;
1547 1973 size_t _max_live_bytes;
1548 1974 size_t _freed_bytes;
1549 1975 FreeRegionList* _cleanup_list;
1550 1976
1551 1977 public:
1552 1978 G1ParNoteEndTask(G1CollectedHeap* g1h,
1553 1979 FreeRegionList* cleanup_list) :
1554 1980 AbstractGangTask("G1 note end"), _g1h(g1h),
1555 1981 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1556 1982
1557 1983 void work(int i) {
1558 1984 double start = os::elapsedTime();
1559 1985 FreeRegionList local_cleanup_list("Local Cleanup List");
1560 1986 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1561 1987 HRRSCleanupTask hrrs_cleanup_task;
1562 1988 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1563 1989 &humongous_proxy_set,
1564 1990 &hrrs_cleanup_task);
1565 1991 if (G1CollectedHeap::use_parallel_gc_threads()) {
1566 1992 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1567 1993 HeapRegion::NoteEndClaimValue);
1568 1994 } else {
1569 1995 _g1h->heap_region_iterate(&g1_note_end);
1570 1996 }
1571 1997 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1572 1998
1573 1999 // Now update the lists
1574 2000 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1575 2001 NULL /* free_list */,
1576 2002 &humongous_proxy_set,
1577 2003 true /* par */);
1578 2004 {
1579 2005 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1580 2006 _max_live_bytes += g1_note_end.max_live_bytes();
1581 2007 _freed_bytes += g1_note_end.freed_bytes();
1582 2008
1583 2009 // If we iterate over the global cleanup list at the end of
1584 2010 // cleanup to do this printing we will not guarantee to only
1585 2011 // generate output for the newly-reclaimed regions (the list
1586 2012 // might not be empty at the beginning of cleanup; we might
1587 2013 // still be working on its previous contents). So we do the
1588 2014 // printing here, before we append the new regions to the global
1589 2015 // cleanup list.
1590 2016
1591 2017 G1HRPrinter* hr_printer = _g1h->hr_printer();
1592 2018 if (hr_printer->is_active()) {
1593 2019 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1594 2020 while (iter.more_available()) {
1595 2021 HeapRegion* hr = iter.get_next();
1596 2022 hr_printer->cleanup(hr);
1597 2023 }
1598 2024 }
1599 2025
1600 2026 _cleanup_list->add_as_tail(&local_cleanup_list);
1601 2027 assert(local_cleanup_list.is_empty(), "post-condition");
1602 2028
1603 2029 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1604 2030 }
1605 2031 double end = os::elapsedTime();
1606 2032 if (G1PrintParCleanupStats) {
1607 2033 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1608 2034 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1609 2035 i, start, end, (end-start)*1000.0,
1610 2036 g1_note_end.regions_claimed(),
1611 2037 g1_note_end.claimed_region_time_sec()*1000.0,
1612 2038 g1_note_end.max_region_time_sec()*1000.0);
1613 2039 }
1614 2040 }
1615 2041 size_t max_live_bytes() { return _max_live_bytes; }
1616 2042 size_t freed_bytes() { return _freed_bytes; }
1617 2043 };
1618 2044
1619 2045 class G1ParScrubRemSetTask: public AbstractGangTask {
1620 2046 protected:
1621 2047 G1RemSet* _g1rs;
1622 2048 BitMap* _region_bm;
1623 2049 BitMap* _card_bm;
1624 2050 public:
1625 2051 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1626 2052 BitMap* region_bm, BitMap* card_bm) :
1627 2053 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1628 2054 _region_bm(region_bm), _card_bm(card_bm)
1629 2055 {}
1630 2056
1631 2057 void work(int i) {
1632 2058 if (G1CollectedHeap::use_parallel_gc_threads()) {
1633 2059 _g1rs->scrub_par(_region_bm, _card_bm, i,
1634 2060 HeapRegion::ScrubRemSetClaimValue);
1635 2061 } else {
1636 2062 _g1rs->scrub(_region_bm, _card_bm);
1637 2063 }
1638 2064 }
1639 2065
1640 2066 };
1641 2067
1642 2068 G1NoteEndOfConcMarkClosure::
1643 2069 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1644 2070 int worker_num,
1645 2071 FreeRegionList* local_cleanup_list,
1646 2072 HumongousRegionSet* humongous_proxy_set,
1647 2073 HRRSCleanupTask* hrrs_cleanup_task)
1648 2074 : _g1(g1), _worker_num(worker_num),
1649 2075 _max_live_bytes(0), _regions_claimed(0),
1650 2076 _freed_bytes(0),
1651 2077 _claimed_region_time(0.0), _max_region_time(0.0),
1652 2078 _local_cleanup_list(local_cleanup_list),
1653 2079 _humongous_proxy_set(humongous_proxy_set),
1654 2080 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1655 2081
1656 2082 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1657 2083 // We use a claim value of zero here because all regions
1658 2084 // were claimed with value 1 in the FinalCount task.
1659 2085 hr->reset_gc_time_stamp();
1660 2086 if (!hr->continuesHumongous()) {
1661 2087 double start = os::elapsedTime();
1662 2088 _regions_claimed++;
1663 2089 hr->note_end_of_marking();
1664 2090 _max_live_bytes += hr->max_live_bytes();
1665 2091 _g1->free_region_if_empty(hr,
1666 2092 &_freed_bytes,
1667 2093 _local_cleanup_list,
1668 2094 _humongous_proxy_set,
1669 2095 _hrrs_cleanup_task,
1670 2096 true /* par */);
1671 2097 double region_time = (os::elapsedTime() - start);
1672 2098 _claimed_region_time += region_time;
1673 2099 if (region_time > _max_region_time) {
1674 2100 _max_region_time = region_time;
1675 2101 }
1676 2102 }
1677 2103 return false;
1678 2104 }
1679 2105
1680 2106 void ConcurrentMark::cleanup() {
1681 2107 // world is stopped at this checkpoint
1682 2108 assert(SafepointSynchronize::is_at_safepoint(),
1683 2109 "world should be stopped");
1684 2110 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1685 2111
1686 2112 // If a full collection has happened, we shouldn't do this.
1687 2113 if (has_aborted()) {
1688 2114 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1689 2115 return;
1690 2116 }
1691 2117
1692 2118 g1h->verify_region_sets_optional();
1693 2119
1694 2120 if (VerifyDuringGC) {
1695 2121 HandleMark hm; // handle scope
1696 2122 gclog_or_tty->print(" VerifyDuringGC:(before)");
1697 2123 Universe::heap()->prepare_for_verify();
1698 2124 Universe::verify(/* allow dirty */ true,
1699 2125 /* silent */ false,
↓ open down ↓ |
180 lines elided |
↑ open up ↑ |
1700 2126 /* option */ VerifyOption_G1UsePrevMarking);
1701 2127 }
1702 2128
1703 2129 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1704 2130 g1p->record_concurrent_mark_cleanup_start();
1705 2131
1706 2132 double start = os::elapsedTime();
1707 2133
1708 2134 HeapRegionRemSet::reset_for_cleanup_tasks();
1709 2135
2136 + // Clear the global region bitmap - it will be filled as part
2137 + // of the final counting task.
2138 + _region_bm.clear();
2139 +
1710 2140 // Do counting once more with the world stopped for good measure.
1711 2141 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1712 2142 &_region_bm, &_card_bm);
2143 +
1713 2144 if (G1CollectedHeap::use_parallel_gc_threads()) {
1714 - assert(g1h->check_heap_region_claim_values(
1715 - HeapRegion::InitialClaimValue),
2145 + assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1716 2146 "sanity check");
1717 2147
1718 2148 int n_workers = g1h->workers()->total_workers();
1719 2149 g1h->set_par_threads(n_workers);
1720 2150 g1h->workers()->run_task(&g1_par_count_task);
1721 2151 g1h->set_par_threads(0);
1722 2152
1723 - assert(g1h->check_heap_region_claim_values(
1724 - HeapRegion::FinalCountClaimValue),
2153 + assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1725 2154 "sanity check");
1726 2155 } else {
1727 2156 g1_par_count_task.work(0);
1728 2157 }
1729 2158
2159 + // Verify that there were no verification failures of
2160 + // the live counting data.
2161 + if (VerifyDuringGC) {
2162 + assert(g1_par_count_task.failures() == 0, "Unexpected failures");
2163 + }
2164 +
1730 2165 size_t known_garbage_bytes =
1731 2166 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1732 2167 g1p->set_known_garbage_bytes(known_garbage_bytes);
1733 2168
1734 2169 size_t start_used_bytes = g1h->used();
1735 2170 _at_least_one_mark_complete = true;
1736 2171 g1h->set_marking_complete();
1737 2172
1738 2173 ergo_verbose4(ErgoConcCycles,
1739 2174 "finish cleanup",
1740 2175 ergo_format_byte("occupancy")
1741 2176 ergo_format_byte("capacity")
1742 2177 ergo_format_byte_perc("known garbage"),
1743 2178 start_used_bytes, g1h->capacity(),
1744 2179 known_garbage_bytes,
1745 2180 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1746 2181
1747 2182 double count_end = os::elapsedTime();
1748 2183 double this_final_counting_time = (count_end - start);
1749 2184 if (G1PrintParCleanupStats) {
1750 2185 gclog_or_tty->print_cr("Cleanup:");
1751 2186 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1752 2187 this_final_counting_time*1000.0);
1753 2188 }
1754 2189 _total_counting_time += this_final_counting_time;
1755 2190
1756 2191 if (G1PrintRegionLivenessInfo) {
1757 2192 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1758 2193 _g1h->heap_region_iterate(&cl);
1759 2194 }
1760 2195
1761 2196 // Install newly created mark bitMap as "prev".
1762 2197 swapMarkBitMaps();
1763 2198
1764 2199 g1h->reset_gc_time_stamp();
1765 2200
1766 2201 // Note end of marking in all heap regions.
1767 2202 double note_end_start = os::elapsedTime();
1768 2203 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1769 2204 if (G1CollectedHeap::use_parallel_gc_threads()) {
1770 2205 int n_workers = g1h->workers()->total_workers();
1771 2206 g1h->set_par_threads(n_workers);
1772 2207 g1h->workers()->run_task(&g1_par_note_end_task);
1773 2208 g1h->set_par_threads(0);
1774 2209
1775 2210 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1776 2211 "sanity check");
1777 2212 } else {
1778 2213 g1_par_note_end_task.work(0);
1779 2214 }
1780 2215
1781 2216 if (!cleanup_list_is_empty()) {
1782 2217 // The cleanup list is not empty, so we'll have to process it
1783 2218 // concurrently. Notify anyone else that might be wanting free
1784 2219 // regions that there will be more free regions coming soon.
1785 2220 g1h->set_free_regions_coming();
1786 2221 }
1787 2222 double note_end_end = os::elapsedTime();
1788 2223 if (G1PrintParCleanupStats) {
1789 2224 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1790 2225 (note_end_end - note_end_start)*1000.0);
1791 2226 }
1792 2227
1793 2228
1794 2229 // call below, since it affects the metric by which we sort the heap
1795 2230 // regions.
1796 2231 if (G1ScrubRemSets) {
1797 2232 double rs_scrub_start = os::elapsedTime();
1798 2233 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1799 2234 if (G1CollectedHeap::use_parallel_gc_threads()) {
1800 2235 int n_workers = g1h->workers()->total_workers();
1801 2236 g1h->set_par_threads(n_workers);
1802 2237 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1803 2238 g1h->set_par_threads(0);
1804 2239
1805 2240 assert(g1h->check_heap_region_claim_values(
1806 2241 HeapRegion::ScrubRemSetClaimValue),
1807 2242 "sanity check");
1808 2243 } else {
1809 2244 g1_par_scrub_rs_task.work(0);
1810 2245 }
1811 2246
1812 2247 double rs_scrub_end = os::elapsedTime();
1813 2248 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1814 2249 _total_rs_scrub_time += this_rs_scrub_time;
1815 2250 }
1816 2251
1817 2252 // this will also free any regions totally full of garbage objects,
1818 2253 // and sort the regions.
1819 2254 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1820 2255 g1_par_note_end_task.freed_bytes(),
1821 2256 g1_par_note_end_task.max_live_bytes());
1822 2257
1823 2258 // Statistics.
1824 2259 double end = os::elapsedTime();
1825 2260 _cleanup_times.add((end - start) * 1000.0);
1826 2261
1827 2262 // G1CollectedHeap::heap()->print();
1828 2263 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1829 2264 // G1CollectedHeap::heap()->get_gc_time_stamp());
1830 2265
1831 2266 if (PrintGC || PrintGCDetails) {
1832 2267 g1h->print_size_transition(gclog_or_tty,
1833 2268 start_used_bytes,
1834 2269 g1h->used(),
1835 2270 g1h->capacity());
1836 2271 }
1837 2272
1838 2273 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1839 2274 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1840 2275
1841 2276 // Clean up will have freed any regions completely full of garbage.
1842 2277 // Update the soft reference policy with the new heap occupancy.
1843 2278 Universe::update_heap_info_at_gc();
1844 2279
1845 2280 // We need to make this be a "collection" so any collection pause that
1846 2281 // races with it goes around and waits for completeCleanup to finish.
1847 2282 g1h->increment_total_collections();
1848 2283
1849 2284 if (VerifyDuringGC) {
1850 2285 HandleMark hm; // handle scope
1851 2286 gclog_or_tty->print(" VerifyDuringGC:(after)");
1852 2287 Universe::heap()->prepare_for_verify();
1853 2288 Universe::verify(/* allow dirty */ true,
1854 2289 /* silent */ false,
1855 2290 /* option */ VerifyOption_G1UsePrevMarking);
1856 2291 }
1857 2292
1858 2293 g1h->verify_region_sets_optional();
1859 2294 }
1860 2295
1861 2296 void ConcurrentMark::completeCleanup() {
1862 2297 if (has_aborted()) return;
1863 2298
1864 2299 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1865 2300
1866 2301 _cleanup_list.verify_optional();
1867 2302 FreeRegionList tmp_free_list("Tmp Free List");
1868 2303
1869 2304 if (G1ConcRegionFreeingVerbose) {
1870 2305 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1871 2306 "cleanup list has "SIZE_FORMAT" entries",
1872 2307 _cleanup_list.length());
1873 2308 }
1874 2309
1875 2310 // Noone else should be accessing the _cleanup_list at this point,
1876 2311 // so it's not necessary to take any locks
1877 2312 while (!_cleanup_list.is_empty()) {
1878 2313 HeapRegion* hr = _cleanup_list.remove_head();
1879 2314 assert(hr != NULL, "the list was not empty");
1880 2315 hr->par_clear();
1881 2316 tmp_free_list.add_as_tail(hr);
1882 2317
1883 2318 // Instead of adding one region at a time to the secondary_free_list,
1884 2319 // we accumulate them in the local list and move them a few at a
1885 2320 // time. This also cuts down on the number of notify_all() calls
1886 2321 // we do during this process. We'll also append the local list when
1887 2322 // _cleanup_list is empty (which means we just removed the last
1888 2323 // region from the _cleanup_list).
1889 2324 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1890 2325 _cleanup_list.is_empty()) {
1891 2326 if (G1ConcRegionFreeingVerbose) {
1892 2327 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1893 2328 "appending "SIZE_FORMAT" entries to the "
1894 2329 "secondary_free_list, clean list still has "
1895 2330 SIZE_FORMAT" entries",
1896 2331 tmp_free_list.length(),
1897 2332 _cleanup_list.length());
1898 2333 }
1899 2334
1900 2335 {
1901 2336 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1902 2337 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1903 2338 SecondaryFreeList_lock->notify_all();
1904 2339 }
1905 2340
1906 2341 if (G1StressConcRegionFreeing) {
1907 2342 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1908 2343 os::sleep(Thread::current(), (jlong) 1, false);
1909 2344 }
1910 2345 }
1911 2346 }
1912 2347 }
1913 2348 assert(tmp_free_list.is_empty(), "post-condition");
1914 2349 }
1915 2350
1916 2351 // Support closures for reference procssing in G1
1917 2352
1918 2353 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1919 2354 HeapWord* addr = (HeapWord*)obj;
1920 2355 return addr != NULL &&
1921 2356 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
↓ open down ↓ |
182 lines elided |
↑ open up ↑ |
1922 2357 }
1923 2358
1924 2359 class G1CMKeepAliveClosure: public OopClosure {
1925 2360 G1CollectedHeap* _g1;
1926 2361 ConcurrentMark* _cm;
1927 2362 CMBitMap* _bitMap;
1928 2363 public:
1929 2364 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1930 2365 CMBitMap* bitMap) :
1931 2366 _g1(g1), _cm(cm),
1932 - _bitMap(bitMap) {}
2367 + _bitMap(bitMap)
2368 + {
2369 + assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2370 + }
1933 2371
1934 2372 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1935 2373 virtual void do_oop( oop* p) { do_oop_work(p); }
1936 2374
1937 2375 template <class T> void do_oop_work(T* p) {
1938 2376 oop obj = oopDesc::load_decode_heap_oop(p);
1939 2377 HeapWord* addr = (HeapWord*)obj;
1940 2378
1941 2379 if (_cm->verbose_high()) {
1942 2380 gclog_or_tty->print_cr("\t[0] we're looking at location "
1943 2381 "*"PTR_FORMAT" = "PTR_FORMAT,
1944 2382 p, (void*) obj);
1945 2383 }
1946 2384
1947 2385 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1948 2386 _bitMap->mark(addr);
2387 + // Update the task specific count data for obj
2388 + _cm->add_to_count_data_for(obj, 0 /* worker_i */);
2389 +
1949 2390 _cm->mark_stack_push(obj);
1950 2391 }
1951 2392 }
1952 2393 };
1953 2394
1954 2395 class G1CMDrainMarkingStackClosure: public VoidClosure {
1955 2396 CMMarkStack* _markStack;
1956 2397 CMBitMap* _bitMap;
1957 2398 G1CMKeepAliveClosure* _oopClosure;
1958 2399 public:
1959 2400 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1960 2401 G1CMKeepAliveClosure* oopClosure) :
1961 2402 _bitMap(bitMap),
1962 2403 _markStack(markStack),
1963 2404 _oopClosure(oopClosure)
1964 2405 {}
1965 2406
1966 2407 void do_void() {
1967 2408 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1968 2409 }
1969 2410 };
1970 2411
1971 2412 // 'Keep Alive' closure used by parallel reference processing.
1972 2413 // An instance of this closure is used in the parallel reference processing
1973 2414 // code rather than an instance of G1CMKeepAliveClosure. We could have used
1974 2415 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
1975 2416 // placed on to discovered ref lists once so we can mark and push with no
1976 2417 // need to check whether the object has already been marked. Using the
1977 2418 // G1CMKeepAliveClosure would mean, however, having all the worker threads
1978 2419 // operating on the global mark stack. This means that an individual
1979 2420 // worker would be doing lock-free pushes while it processes its own
1980 2421 // discovered ref list followed by drain call. If the discovered ref lists
1981 2422 // are unbalanced then this could cause interference with the other
1982 2423 // workers. Using a CMTask (and its embedded local data structures)
1983 2424 // avoids that potential interference.
1984 2425 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
1985 2426 ConcurrentMark* _cm;
1986 2427 CMTask* _task;
1987 2428 CMBitMap* _bitMap;
1988 2429 int _ref_counter_limit;
1989 2430 int _ref_counter;
1990 2431 public:
1991 2432 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
1992 2433 CMTask* task,
1993 2434 CMBitMap* bitMap) :
1994 2435 _cm(cm), _task(task), _bitMap(bitMap),
1995 2436 _ref_counter_limit(G1RefProcDrainInterval)
1996 2437 {
1997 2438 assert(_ref_counter_limit > 0, "sanity");
1998 2439 _ref_counter = _ref_counter_limit;
1999 2440 }
2000 2441
2001 2442 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2002 2443 virtual void do_oop( oop* p) { do_oop_work(p); }
2003 2444
2004 2445 template <class T> void do_oop_work(T* p) {
2005 2446 if (!_cm->has_overflown()) {
2006 2447 oop obj = oopDesc::load_decode_heap_oop(p);
2007 2448 if (_cm->verbose_high()) {
2008 2449 gclog_or_tty->print_cr("\t[%d] we're looking at location "
2009 2450 "*"PTR_FORMAT" = "PTR_FORMAT,
2010 2451 _task->task_id(), p, (void*) obj);
2011 2452 }
2012 2453
2013 2454 _task->deal_with_reference(obj);
2014 2455 _ref_counter--;
2015 2456
2016 2457 if (_ref_counter == 0) {
2017 2458 // We have dealt with _ref_counter_limit references, pushing them and objects
2018 2459 // reachable from them on to the local stack (and possibly the global stack).
2019 2460 // Call do_marking_step() to process these entries. We call the routine in a
2020 2461 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2021 2462 // with the entries that we've pushed as a result of the deal_with_reference
2022 2463 // calls above) or we overflow.
2023 2464 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2024 2465 // while there may still be some work to do. (See the comment at the
2025 2466 // beginning of CMTask::do_marking_step() for those conditions - one of which
2026 2467 // is reaching the specified time target.) It is only when
2027 2468 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2028 2469 // that the marking has completed.
2029 2470 do {
2030 2471 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2031 2472 _task->do_marking_step(mark_step_duration_ms,
2032 2473 false /* do_stealing */,
2033 2474 false /* do_termination */);
2034 2475 } while (_task->has_aborted() && !_cm->has_overflown());
2035 2476 _ref_counter = _ref_counter_limit;
2036 2477 }
2037 2478 } else {
2038 2479 if (_cm->verbose_high()) {
2039 2480 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2040 2481 }
2041 2482 }
2042 2483 }
2043 2484 };
2044 2485
2045 2486 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2046 2487 ConcurrentMark* _cm;
2047 2488 CMTask* _task;
2048 2489 public:
2049 2490 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2050 2491 _cm(cm), _task(task)
2051 2492 {}
2052 2493
2053 2494 void do_void() {
2054 2495 do {
2055 2496 if (_cm->verbose_high()) {
2056 2497 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2057 2498 _task->task_id());
2058 2499 }
2059 2500
2060 2501 // We call CMTask::do_marking_step() to completely drain the local and
2061 2502 // global marking stacks. The routine is called in a loop, which we'll
2062 2503 // exit if there's nothing more to do (i.e. we'completely drained the
2063 2504 // entries that were pushed as a result of applying the
2064 2505 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2065 2506 // lists above) or we overflow the global marking stack.
2066 2507 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2067 2508 // while there may still be some work to do. (See the comment at the
2068 2509 // beginning of CMTask::do_marking_step() for those conditions - one of which
2069 2510 // is reaching the specified time target.) It is only when
2070 2511 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2071 2512 // that the marking has completed.
2072 2513
2073 2514 _task->do_marking_step(1000000000.0 /* something very large */,
2074 2515 true /* do_stealing */,
2075 2516 true /* do_termination */);
2076 2517 } while (_task->has_aborted() && !_cm->has_overflown());
2077 2518 }
2078 2519 };
2079 2520
2080 2521 // Implementation of AbstractRefProcTaskExecutor for parallel
2081 2522 // reference processing at the end of G1 concurrent marking
2082 2523
2083 2524 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2084 2525 private:
2085 2526 G1CollectedHeap* _g1h;
2086 2527 ConcurrentMark* _cm;
2087 2528 CMBitMap* _bitmap;
2088 2529 WorkGang* _workers;
2089 2530 int _active_workers;
2090 2531
2091 2532 public:
2092 2533 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2093 2534 ConcurrentMark* cm,
2094 2535 CMBitMap* bitmap,
2095 2536 WorkGang* workers,
2096 2537 int n_workers) :
2097 2538 _g1h(g1h), _cm(cm), _bitmap(bitmap),
2098 2539 _workers(workers), _active_workers(n_workers)
2099 2540 { }
2100 2541
2101 2542 // Executes the given task using concurrent marking worker threads.
2102 2543 virtual void execute(ProcessTask& task);
2103 2544 virtual void execute(EnqueueTask& task);
2104 2545 };
2105 2546
2106 2547 class G1CMRefProcTaskProxy: public AbstractGangTask {
2107 2548 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2108 2549 ProcessTask& _proc_task;
2109 2550 G1CollectedHeap* _g1h;
2110 2551 ConcurrentMark* _cm;
2111 2552 CMBitMap* _bitmap;
2112 2553
2113 2554 public:
2114 2555 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2115 2556 G1CollectedHeap* g1h,
2116 2557 ConcurrentMark* cm,
2117 2558 CMBitMap* bitmap) :
2118 2559 AbstractGangTask("Process reference objects in parallel"),
2119 2560 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2120 2561 {}
2121 2562
2122 2563 virtual void work(int i) {
2123 2564 CMTask* marking_task = _cm->task(i);
2124 2565 G1CMIsAliveClosure g1_is_alive(_g1h);
2125 2566 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2126 2567 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2127 2568
2128 2569 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2129 2570 }
2130 2571 };
2131 2572
2132 2573 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2133 2574 assert(_workers != NULL, "Need parallel worker threads.");
2134 2575
2135 2576 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2136 2577
2137 2578 // We need to reset the phase for each task execution so that
2138 2579 // the termination protocol of CMTask::do_marking_step works.
2139 2580 _cm->set_phase(_active_workers, false /* concurrent */);
2140 2581 _g1h->set_par_threads(_active_workers);
2141 2582 _workers->run_task(&proc_task_proxy);
2142 2583 _g1h->set_par_threads(0);
2143 2584 }
2144 2585
2145 2586 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2146 2587 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2147 2588 EnqueueTask& _enq_task;
2148 2589
2149 2590 public:
2150 2591 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2151 2592 AbstractGangTask("Enqueue reference objects in parallel"),
2152 2593 _enq_task(enq_task)
2153 2594 { }
2154 2595
2155 2596 virtual void work(int i) {
2156 2597 _enq_task.work(i);
2157 2598 }
2158 2599 };
2159 2600
2160 2601 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2161 2602 assert(_workers != NULL, "Need parallel worker threads.");
2162 2603
2163 2604 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2164 2605
2165 2606 _g1h->set_par_threads(_active_workers);
2166 2607 _workers->run_task(&enq_task_proxy);
2167 2608 _g1h->set_par_threads(0);
2168 2609 }
2169 2610
2170 2611 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2171 2612 ResourceMark rm;
2172 2613 HandleMark hm;
2173 2614
2174 2615 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2175 2616
2176 2617 // Is alive closure.
2177 2618 G1CMIsAliveClosure g1_is_alive(g1h);
2178 2619
2179 2620 // Inner scope to exclude the cleaning of the string and symbol
2180 2621 // tables from the displayed time.
2181 2622 {
2182 2623 bool verbose = PrintGC && PrintGCDetails;
2183 2624 if (verbose) {
2184 2625 gclog_or_tty->put(' ');
2185 2626 }
2186 2627 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2187 2628
2188 2629 ReferenceProcessor* rp = g1h->ref_processor_cm();
2189 2630
2190 2631 // See the comment in G1CollectedHeap::ref_processing_init()
2191 2632 // about how reference processing currently works in G1.
2192 2633
2193 2634 // Process weak references.
2194 2635 rp->setup_policy(clear_all_soft_refs);
2195 2636 assert(_markStack.isEmpty(), "mark stack should be empty");
2196 2637
2197 2638 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2198 2639 G1CMDrainMarkingStackClosure
2199 2640 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2200 2641
2201 2642 // We use the work gang from the G1CollectedHeap and we utilize all
2202 2643 // the worker threads.
2203 2644 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2204 2645 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2205 2646
2206 2647 G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2207 2648 g1h->workers(), active_workers);
2208 2649
2209 2650 if (rp->processing_is_mt()) {
2210 2651 // Set the degree of MT here. If the discovery is done MT, there
2211 2652 // may have been a different number of threads doing the discovery
2212 2653 // and a different number of discovered lists may have Ref objects.
2213 2654 // That is OK as long as the Reference lists are balanced (see
2214 2655 // balance_all_queues() and balance_queues()).
2215 2656 rp->set_active_mt_degree(active_workers);
2216 2657
2217 2658 rp->process_discovered_references(&g1_is_alive,
2218 2659 &g1_keep_alive,
2219 2660 &g1_drain_mark_stack,
2220 2661 &par_task_executor);
2221 2662
2222 2663 // The work routines of the parallel keep_alive and drain_marking_stack
2223 2664 // will set the has_overflown flag if we overflow the global marking
2224 2665 // stack.
2225 2666 } else {
2226 2667 rp->process_discovered_references(&g1_is_alive,
2227 2668 &g1_keep_alive,
2228 2669 &g1_drain_mark_stack,
2229 2670 NULL);
2230 2671 }
2231 2672
2232 2673 assert(_markStack.overflow() || _markStack.isEmpty(),
2233 2674 "mark stack should be empty (unless it overflowed)");
2234 2675 if (_markStack.overflow()) {
2235 2676 // Should have been done already when we tried to push an
2236 2677 // entry on to the global mark stack. But let's do it again.
2237 2678 set_has_overflown();
2238 2679 }
2239 2680
2240 2681 if (rp->processing_is_mt()) {
2241 2682 assert(rp->num_q() == active_workers, "why not");
2242 2683 rp->enqueue_discovered_references(&par_task_executor);
2243 2684 } else {
2244 2685 rp->enqueue_discovered_references();
2245 2686 }
2246 2687
2247 2688 rp->verify_no_references_recorded();
2248 2689 assert(!rp->discovery_enabled(), "Post condition");
2249 2690 }
2250 2691
2251 2692 // Now clean up stale oops in StringTable
2252 2693 StringTable::unlink(&g1_is_alive);
2253 2694 // Clean up unreferenced symbols in symbol table.
2254 2695 SymbolTable::unlink();
2255 2696 }
2256 2697
2257 2698 void ConcurrentMark::swapMarkBitMaps() {
2258 2699 CMBitMapRO* temp = _prevMarkBitMap;
2259 2700 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2260 2701 _nextMarkBitMap = (CMBitMap*) temp;
2261 2702 }
2262 2703
2263 2704 class CMRemarkTask: public AbstractGangTask {
2264 2705 private:
2265 2706 ConcurrentMark *_cm;
2266 2707
2267 2708 public:
2268 2709 void work(int worker_i) {
2269 2710 // Since all available tasks are actually started, we should
2270 2711 // only proceed if we're supposed to be actived.
2271 2712 if ((size_t)worker_i < _cm->active_tasks()) {
2272 2713 CMTask* task = _cm->task(worker_i);
2273 2714 task->record_start_time();
2274 2715 do {
2275 2716 task->do_marking_step(1000000000.0 /* something very large */,
2276 2717 true /* do_stealing */,
2277 2718 true /* do_termination */);
2278 2719 } while (task->has_aborted() && !_cm->has_overflown());
2279 2720 // If we overflow, then we do not want to restart. We instead
2280 2721 // want to abort remark and do concurrent marking again.
2281 2722 task->record_end_time();
2282 2723 }
2283 2724 }
2284 2725
2285 2726 CMRemarkTask(ConcurrentMark* cm) :
2286 2727 AbstractGangTask("Par Remark"), _cm(cm) { }
2287 2728 };
2288 2729
2289 2730 void ConcurrentMark::checkpointRootsFinalWork() {
2290 2731 ResourceMark rm;
2291 2732 HandleMark hm;
2292 2733 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2293 2734
2294 2735 g1h->ensure_parsability(false);
2295 2736
2296 2737 if (G1CollectedHeap::use_parallel_gc_threads()) {
2297 2738 G1CollectedHeap::StrongRootsScope srs(g1h);
2298 2739 // this is remark, so we'll use up all available threads
2299 2740 int active_workers = ParallelGCThreads;
2300 2741 set_phase(active_workers, false /* concurrent */);
2301 2742
2302 2743 CMRemarkTask remarkTask(this);
2303 2744 // We will start all available threads, even if we decide that the
2304 2745 // active_workers will be fewer. The extra ones will just bail out
2305 2746 // immediately.
2306 2747 int n_workers = g1h->workers()->total_workers();
2307 2748 g1h->set_par_threads(n_workers);
2308 2749 g1h->workers()->run_task(&remarkTask);
2309 2750 g1h->set_par_threads(0);
2310 2751 } else {
2311 2752 G1CollectedHeap::StrongRootsScope srs(g1h);
2312 2753 // this is remark, so we'll use up all available threads
2313 2754 int active_workers = 1;
2314 2755 set_phase(active_workers, false /* concurrent */);
2315 2756
2316 2757 CMRemarkTask remarkTask(this);
2317 2758 // We will start all available threads, even if we decide that the
2318 2759 // active_workers will be fewer. The extra ones will just bail out
2319 2760 // immediately.
2320 2761 remarkTask.work(0);
2321 2762 }
2322 2763 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2323 2764 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2324 2765
2325 2766 print_stats();
2326 2767
2327 2768 #if VERIFY_OBJS_PROCESSED
2328 2769 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2329 2770 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2330 2771 _scan_obj_cl.objs_processed,
2331 2772 ThreadLocalObjQueue::objs_enqueued);
2332 2773 guarantee(_scan_obj_cl.objs_processed ==
2333 2774 ThreadLocalObjQueue::objs_enqueued,
2334 2775 "Different number of objs processed and enqueued.");
2335 2776 }
2336 2777 #endif
2337 2778 }
2338 2779
2339 2780 #ifndef PRODUCT
2340 2781
2341 2782 class PrintReachableOopClosure: public OopClosure {
2342 2783 private:
2343 2784 G1CollectedHeap* _g1h;
2344 2785 outputStream* _out;
2345 2786 VerifyOption _vo;
2346 2787 bool _all;
2347 2788
2348 2789 public:
2349 2790 PrintReachableOopClosure(outputStream* out,
2350 2791 VerifyOption vo,
2351 2792 bool all) :
2352 2793 _g1h(G1CollectedHeap::heap()),
2353 2794 _out(out), _vo(vo), _all(all) { }
2354 2795
2355 2796 void do_oop(narrowOop* p) { do_oop_work(p); }
2356 2797 void do_oop( oop* p) { do_oop_work(p); }
2357 2798
2358 2799 template <class T> void do_oop_work(T* p) {
2359 2800 oop obj = oopDesc::load_decode_heap_oop(p);
2360 2801 const char* str = NULL;
2361 2802 const char* str2 = "";
2362 2803
2363 2804 if (obj == NULL) {
2364 2805 str = "";
2365 2806 } else if (!_g1h->is_in_g1_reserved(obj)) {
2366 2807 str = " O";
2367 2808 } else {
2368 2809 HeapRegion* hr = _g1h->heap_region_containing(obj);
2369 2810 guarantee(hr != NULL, "invariant");
2370 2811 bool over_tams = false;
2371 2812 bool marked = false;
2372 2813
2373 2814 switch (_vo) {
2374 2815 case VerifyOption_G1UsePrevMarking:
2375 2816 over_tams = hr->obj_allocated_since_prev_marking(obj);
2376 2817 marked = _g1h->isMarkedPrev(obj);
2377 2818 break;
2378 2819 case VerifyOption_G1UseNextMarking:
2379 2820 over_tams = hr->obj_allocated_since_next_marking(obj);
2380 2821 marked = _g1h->isMarkedNext(obj);
2381 2822 break;
2382 2823 case VerifyOption_G1UseMarkWord:
2383 2824 marked = obj->is_gc_marked();
2384 2825 break;
2385 2826 default:
2386 2827 ShouldNotReachHere();
2387 2828 }
2388 2829
2389 2830 if (over_tams) {
2390 2831 str = " >";
2391 2832 if (marked) {
2392 2833 str2 = " AND MARKED";
2393 2834 }
2394 2835 } else if (marked) {
2395 2836 str = " M";
2396 2837 } else {
2397 2838 str = " NOT";
2398 2839 }
2399 2840 }
2400 2841
2401 2842 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2402 2843 p, (void*) obj, str, str2);
2403 2844 }
2404 2845 };
2405 2846
2406 2847 class PrintReachableObjectClosure : public ObjectClosure {
2407 2848 private:
2408 2849 G1CollectedHeap* _g1h;
2409 2850 outputStream* _out;
2410 2851 VerifyOption _vo;
2411 2852 bool _all;
2412 2853 HeapRegion* _hr;
2413 2854
2414 2855 public:
2415 2856 PrintReachableObjectClosure(outputStream* out,
2416 2857 VerifyOption vo,
2417 2858 bool all,
2418 2859 HeapRegion* hr) :
2419 2860 _g1h(G1CollectedHeap::heap()),
2420 2861 _out(out), _vo(vo), _all(all), _hr(hr) { }
2421 2862
2422 2863 void do_object(oop o) {
2423 2864 bool over_tams = false;
2424 2865 bool marked = false;
2425 2866
2426 2867 switch (_vo) {
2427 2868 case VerifyOption_G1UsePrevMarking:
2428 2869 over_tams = _hr->obj_allocated_since_prev_marking(o);
2429 2870 marked = _g1h->isMarkedPrev(o);
2430 2871 break;
2431 2872 case VerifyOption_G1UseNextMarking:
2432 2873 over_tams = _hr->obj_allocated_since_next_marking(o);
2433 2874 marked = _g1h->isMarkedNext(o);
2434 2875 break;
2435 2876 case VerifyOption_G1UseMarkWord:
2436 2877 marked = o->is_gc_marked();
2437 2878 break;
2438 2879 default:
2439 2880 ShouldNotReachHere();
2440 2881 }
2441 2882 bool print_it = _all || over_tams || marked;
2442 2883
2443 2884 if (print_it) {
2444 2885 _out->print_cr(" "PTR_FORMAT"%s",
2445 2886 o, (over_tams) ? " >" : (marked) ? " M" : "");
2446 2887 PrintReachableOopClosure oopCl(_out, _vo, _all);
2447 2888 o->oop_iterate(&oopCl);
2448 2889 }
2449 2890 }
2450 2891 };
2451 2892
2452 2893 class PrintReachableRegionClosure : public HeapRegionClosure {
2453 2894 private:
2454 2895 outputStream* _out;
2455 2896 VerifyOption _vo;
2456 2897 bool _all;
2457 2898
2458 2899 public:
2459 2900 bool doHeapRegion(HeapRegion* hr) {
2460 2901 HeapWord* b = hr->bottom();
2461 2902 HeapWord* e = hr->end();
2462 2903 HeapWord* t = hr->top();
2463 2904 HeapWord* p = NULL;
2464 2905
2465 2906 switch (_vo) {
2466 2907 case VerifyOption_G1UsePrevMarking:
2467 2908 p = hr->prev_top_at_mark_start();
2468 2909 break;
2469 2910 case VerifyOption_G1UseNextMarking:
2470 2911 p = hr->next_top_at_mark_start();
2471 2912 break;
2472 2913 case VerifyOption_G1UseMarkWord:
2473 2914 // When we are verifying marking using the mark word
2474 2915 // TAMS has no relevance.
2475 2916 assert(p == NULL, "post-condition");
2476 2917 break;
2477 2918 default:
2478 2919 ShouldNotReachHere();
2479 2920 }
2480 2921 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2481 2922 "TAMS: "PTR_FORMAT, b, e, t, p);
2482 2923 _out->cr();
2483 2924
2484 2925 HeapWord* from = b;
2485 2926 HeapWord* to = t;
2486 2927
2487 2928 if (to > from) {
2488 2929 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2489 2930 _out->cr();
2490 2931 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2491 2932 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2492 2933 _out->cr();
2493 2934 }
2494 2935
2495 2936 return false;
2496 2937 }
2497 2938
2498 2939 PrintReachableRegionClosure(outputStream* out,
2499 2940 VerifyOption vo,
2500 2941 bool all) :
2501 2942 _out(out), _vo(vo), _all(all) { }
2502 2943 };
2503 2944
2504 2945 static const char* verify_option_to_tams(VerifyOption vo) {
2505 2946 switch (vo) {
2506 2947 case VerifyOption_G1UsePrevMarking:
2507 2948 return "PTAMS";
2508 2949 case VerifyOption_G1UseNextMarking:
2509 2950 return "NTAMS";
2510 2951 default:
2511 2952 return "NONE";
2512 2953 }
2513 2954 }
2514 2955
2515 2956 void ConcurrentMark::print_reachable(const char* str,
2516 2957 VerifyOption vo,
2517 2958 bool all) {
2518 2959 gclog_or_tty->cr();
2519 2960 gclog_or_tty->print_cr("== Doing heap dump... ");
2520 2961
2521 2962 if (G1PrintReachableBaseFile == NULL) {
2522 2963 gclog_or_tty->print_cr(" #### error: no base file defined");
2523 2964 return;
2524 2965 }
2525 2966
2526 2967 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2527 2968 (JVM_MAXPATHLEN - 1)) {
2528 2969 gclog_or_tty->print_cr(" #### error: file name too long");
2529 2970 return;
2530 2971 }
2531 2972
2532 2973 char file_name[JVM_MAXPATHLEN];
2533 2974 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2534 2975 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2535 2976
2536 2977 fileStream fout(file_name);
2537 2978 if (!fout.is_open()) {
2538 2979 gclog_or_tty->print_cr(" #### error: could not open file");
2539 2980 return;
2540 2981 }
2541 2982
2542 2983 outputStream* out = &fout;
2543 2984 out->print_cr("-- USING %s", verify_option_to_tams(vo));
2544 2985 out->cr();
2545 2986
2546 2987 out->print_cr("--- ITERATING OVER REGIONS");
2547 2988 out->cr();
2548 2989 PrintReachableRegionClosure rcl(out, vo, all);
2549 2990 _g1h->heap_region_iterate(&rcl);
2550 2991 out->cr();
2551 2992
2552 2993 gclog_or_tty->print_cr(" done");
2553 2994 gclog_or_tty->flush();
2554 2995 }
2555 2996
2556 2997 #endif // PRODUCT
2557 2998
2558 2999 // This note is for drainAllSATBBuffers and the code in between.
2559 3000 // In the future we could reuse a task to do this work during an
2560 3001 // evacuation pause (since now tasks are not active and can be claimed
2561 3002 // during an evacuation pause). This was a late change to the code and
2562 3003 // is currently not being taken advantage of.
2563 3004
2564 3005 class CMGlobalObjectClosure : public ObjectClosure {
2565 3006 private:
2566 3007 ConcurrentMark* _cm;
2567 3008
2568 3009 public:
2569 3010 void do_object(oop obj) {
2570 3011 _cm->deal_with_reference(obj);
2571 3012 }
2572 3013
2573 3014 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2574 3015 };
2575 3016
2576 3017 void ConcurrentMark::deal_with_reference(oop obj) {
2577 3018 if (verbose_high()) {
2578 3019 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2579 3020 (void*) obj);
2580 3021 }
2581 3022
2582 3023 HeapWord* objAddr = (HeapWord*) obj;
2583 3024 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2584 3025 if (_g1h->is_in_g1_reserved(objAddr)) {
2585 3026 assert(obj != NULL, "null check is implicit");
2586 3027 if (!_nextMarkBitMap->isMarked(objAddr)) {
2587 3028 // Only get the containing region if the object is not marked on the
2588 3029 // bitmap (otherwise, it's a waste of time since we won't do
↓ open down ↓ |
630 lines elided |
↑ open up ↑ |
2589 3030 // anything with it).
2590 3031 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2591 3032 if (!hr->obj_allocated_since_next_marking(obj)) {
2592 3033 if (verbose_high()) {
2593 3034 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2594 3035 "marked", (void*) obj);
2595 3036 }
2596 3037
2597 3038 // we need to mark it first
2598 3039 if (_nextMarkBitMap->parMark(objAddr)) {
3040 + // Update the task specific count data for obj
3041 + add_to_count_data_for(obj, hr, 0 /* worker_i */);
3042 +
2599 3043 // No OrderAccess:store_load() is needed. It is implicit in the
2600 3044 // CAS done in parMark(objAddr) above
2601 3045 HeapWord* finger = _finger;
2602 3046 if (objAddr < finger) {
2603 3047 if (verbose_high()) {
2604 3048 gclog_or_tty->print_cr("[global] below the global finger "
2605 3049 "("PTR_FORMAT"), pushing it", finger);
2606 3050 }
2607 3051 if (!mark_stack_push(obj)) {
2608 3052 if (verbose_low()) {
2609 3053 gclog_or_tty->print_cr("[global] global stack overflow during "
2610 3054 "deal_with_reference");
2611 3055 }
2612 3056 }
2613 3057 }
2614 3058 }
2615 3059 }
2616 3060 }
2617 3061 }
2618 3062 }
2619 3063
2620 3064 void ConcurrentMark::drainAllSATBBuffers() {
2621 3065 CMGlobalObjectClosure oc(this);
2622 3066 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2623 3067 satb_mq_set.set_closure(&oc);
2624 3068
2625 3069 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2626 3070 if (verbose_medium()) {
2627 3071 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2628 3072 }
2629 3073 }
2630 3074
2631 3075 // no need to check whether we should do this, as this is only
2632 3076 // called during an evacuation pause
2633 3077 satb_mq_set.iterate_closure_all_threads();
2634 3078
2635 3079 satb_mq_set.set_closure(NULL);
2636 3080 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2637 3081 }
2638 3082
2639 3083 void ConcurrentMark::markPrev(oop p) {
2640 3084 // Note we are overriding the read-only view of the prev map here, via
2641 3085 // the cast.
2642 3086 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2643 3087 }
2644 3088
2645 3089 void ConcurrentMark::clear(oop p) {
2646 3090 assert(p != NULL && p->is_oop(), "expected an oop");
2647 3091 HeapWord* addr = (HeapWord*)p;
2648 3092 assert(addr >= _nextMarkBitMap->startWord() ||
2649 3093 addr < _nextMarkBitMap->endWord(), "in a region");
2650 3094
2651 3095 _nextMarkBitMap->clear(addr);
2652 3096 }
2653 3097
2654 3098 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2655 3099 // Note we are overriding the read-only view of the prev map here, via
2656 3100 // the cast.
2657 3101 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2658 3102 _nextMarkBitMap->clearRange(mr);
2659 3103 }
2660 3104
2661 3105 HeapRegion*
2662 3106 ConcurrentMark::claim_region(int task_num) {
2663 3107 // "checkpoint" the finger
2664 3108 HeapWord* finger = _finger;
2665 3109
2666 3110 // _heap_end will not change underneath our feet; it only changes at
2667 3111 // yield points.
2668 3112 while (finger < _heap_end) {
2669 3113 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2670 3114
2671 3115 // Note on how this code handles humongous regions. In the
2672 3116 // normal case the finger will reach the start of a "starts
2673 3117 // humongous" (SH) region. Its end will either be the end of the
2674 3118 // last "continues humongous" (CH) region in the sequence, or the
2675 3119 // standard end of the SH region (if the SH is the only region in
2676 3120 // the sequence). That way claim_region() will skip over the CH
2677 3121 // regions. However, there is a subtle race between a CM thread
2678 3122 // executing this method and a mutator thread doing a humongous
2679 3123 // object allocation. The two are not mutually exclusive as the CM
2680 3124 // thread does not need to hold the Heap_lock when it gets
2681 3125 // here. So there is a chance that claim_region() will come across
2682 3126 // a free region that's in the progress of becoming a SH or a CH
2683 3127 // region. In the former case, it will either
2684 3128 // a) Miss the update to the region's end, in which case it will
2685 3129 // visit every subsequent CH region, will find their bitmaps
2686 3130 // empty, and do nothing, or
2687 3131 // b) Will observe the update of the region's end (in which case
2688 3132 // it will skip the subsequent CH regions).
2689 3133 // If it comes across a region that suddenly becomes CH, the
2690 3134 // scenario will be similar to b). So, the race between
2691 3135 // claim_region() and a humongous object allocation might force us
2692 3136 // to do a bit of unnecessary work (due to some unnecessary bitmap
2693 3137 // iterations) but it should not introduce and correctness issues.
2694 3138 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2695 3139 HeapWord* bottom = curr_region->bottom();
2696 3140 HeapWord* end = curr_region->end();
2697 3141 HeapWord* limit = curr_region->next_top_at_mark_start();
2698 3142
2699 3143 if (verbose_low()) {
2700 3144 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2701 3145 "["PTR_FORMAT", "PTR_FORMAT"), "
2702 3146 "limit = "PTR_FORMAT,
2703 3147 task_num, curr_region, bottom, end, limit);
2704 3148 }
2705 3149
2706 3150 // Is the gap between reading the finger and doing the CAS too long?
2707 3151 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2708 3152 if (res == finger) {
2709 3153 // we succeeded
2710 3154
2711 3155 // notice that _finger == end cannot be guaranteed here since,
2712 3156 // someone else might have moved the finger even further
2713 3157 assert(_finger >= end, "the finger should have moved forward");
2714 3158
2715 3159 if (verbose_low()) {
2716 3160 gclog_or_tty->print_cr("[%d] we were successful with region = "
2717 3161 PTR_FORMAT, task_num, curr_region);
2718 3162 }
2719 3163
2720 3164 if (limit > bottom) {
2721 3165 if (verbose_low()) {
2722 3166 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2723 3167 "returning it ", task_num, curr_region);
2724 3168 }
2725 3169 return curr_region;
2726 3170 } else {
2727 3171 assert(limit == bottom,
2728 3172 "the region limit should be at bottom");
2729 3173 if (verbose_low()) {
2730 3174 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2731 3175 "returning NULL", task_num, curr_region);
2732 3176 }
2733 3177 // we return NULL and the caller should try calling
2734 3178 // claim_region() again.
2735 3179 return NULL;
2736 3180 }
2737 3181 } else {
2738 3182 assert(_finger > finger, "the finger should have moved forward");
2739 3183 if (verbose_low()) {
2740 3184 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2741 3185 "global finger = "PTR_FORMAT", "
2742 3186 "our finger = "PTR_FORMAT,
2743 3187 task_num, _finger, finger);
2744 3188 }
2745 3189
2746 3190 // read it again
2747 3191 finger = _finger;
2748 3192 }
2749 3193 }
2750 3194
2751 3195 return NULL;
2752 3196 }
2753 3197
2754 3198 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2755 3199 bool result = false;
2756 3200 for (int i = 0; i < (int)_max_task_num; ++i) {
2757 3201 CMTask* the_task = _tasks[i];
2758 3202 MemRegion mr = the_task->aborted_region();
2759 3203 if (mr.start() != NULL) {
2760 3204 assert(mr.end() != NULL, "invariant");
2761 3205 assert(mr.word_size() > 0, "invariant");
2762 3206 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2763 3207 assert(hr != NULL, "invariant");
2764 3208 if (hr->in_collection_set()) {
2765 3209 // The region points into the collection set
2766 3210 the_task->set_aborted_region(MemRegion());
2767 3211 result = true;
2768 3212 }
2769 3213 }
2770 3214 }
2771 3215 return result;
2772 3216 }
2773 3217
2774 3218 bool ConcurrentMark::has_aborted_regions() {
2775 3219 for (int i = 0; i < (int)_max_task_num; ++i) {
2776 3220 CMTask* the_task = _tasks[i];
2777 3221 MemRegion mr = the_task->aborted_region();
2778 3222 if (mr.start() != NULL) {
2779 3223 assert(mr.end() != NULL, "invariant");
2780 3224 assert(mr.word_size() > 0, "invariant");
2781 3225 return true;
2782 3226 }
2783 3227 }
2784 3228 return false;
2785 3229 }
2786 3230
2787 3231 void ConcurrentMark::oops_do(OopClosure* cl) {
2788 3232 if (_markStack.size() > 0 && verbose_low()) {
2789 3233 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2790 3234 "size = %d", _markStack.size());
2791 3235 }
2792 3236 // we first iterate over the contents of the mark stack...
2793 3237 _markStack.oops_do(cl);
2794 3238
2795 3239 for (int i = 0; i < (int)_max_task_num; ++i) {
2796 3240 OopTaskQueue* queue = _task_queues->queue((int)i);
2797 3241
2798 3242 if (queue->size() > 0 && verbose_low()) {
2799 3243 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2800 3244 "size = %d", i, queue->size());
2801 3245 }
2802 3246
2803 3247 // ...then over the contents of the all the task queues.
2804 3248 queue->oops_do(cl);
2805 3249 }
2806 3250
2807 3251 // Invalidate any entries, that are in the region stack, that
2808 3252 // point into the collection set
2809 3253 if (_regionStack.invalidate_entries_into_cset()) {
2810 3254 // otherwise, any gray objects copied during the evacuation pause
2811 3255 // might not be visited.
2812 3256 assert(_should_gray_objects, "invariant");
2813 3257 }
2814 3258
2815 3259 // Invalidate any aborted regions, recorded in the individual CM
2816 3260 // tasks, that point into the collection set.
2817 3261 if (invalidate_aborted_regions_in_cset()) {
2818 3262 // otherwise, any gray objects copied during the evacuation pause
2819 3263 // might not be visited.
2820 3264 assert(_should_gray_objects, "invariant");
2821 3265 }
2822 3266
2823 3267 }
2824 3268
2825 3269 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2826 3270 _markStack.setEmpty();
2827 3271 _markStack.clear_overflow();
2828 3272 _regionStack.setEmpty();
2829 3273 _regionStack.clear_overflow();
2830 3274 if (clear_overflow) {
2831 3275 clear_has_overflown();
2832 3276 } else {
2833 3277 assert(has_overflown(), "pre-condition");
2834 3278 }
↓ open down ↓ |
226 lines elided |
↑ open up ↑ |
2835 3279 _finger = _heap_start;
2836 3280
2837 3281 for (int i = 0; i < (int)_max_task_num; ++i) {
2838 3282 OopTaskQueue* queue = _task_queues->queue(i);
2839 3283 queue->set_empty();
2840 3284 // Clear any partial regions from the CMTasks
2841 3285 _tasks[i]->clear_aborted_region();
2842 3286 }
2843 3287 }
2844 3288
3289 +// Clear the per-worker arrays used to store the per-region counting data
3290 +void ConcurrentMark::clear_all_count_data() {
3291 + assert(SafepointSynchronize::is_at_safepoint() ||
3292 + !Universe::is_fully_initialized(), "must be");
3293 +
3294 + int max_regions = _g1h->max_regions();
3295 +
3296 + assert(_max_task_num != 0, "unitialized");
3297 + assert(_count_card_bitmaps != NULL, "uninitialized");
3298 + assert(_count_marked_bytes != NULL, "uninitialized");
3299 +
3300 + for (int i = 0; i < _max_task_num; i += 1) {
3301 + BitMap& task_card_bm = count_card_bitmap_for(i);
3302 + size_t* marked_bytes_array = count_marked_bytes_for(i);
3303 +
3304 + assert(task_card_bm.size() == _card_bm.size(), "size mismatch");
3305 + assert(marked_bytes_array != NULL, "uninitialized");
3306 +
3307 + for (int j = 0; j < max_regions; j++) {
3308 + marked_bytes_array[j] = 0;
3309 + }
3310 + task_card_bm.clear();
3311 + }
3312 +}
3313 +
3314 +// Adds the given region to the counting data structures
3315 +// for the given task id.
3316 +void ConcurrentMark::add_to_count_data_for(MemRegion mr,
3317 + HeapRegion* hr,
3318 + int worker_i) {
3319 + G1CollectedHeap* g1h = _g1h;
3320 + HeapWord* start = mr.start();
3321 + HeapWord* last = mr.last();
3322 + size_t index = hr->hrs_index();
3323 +
3324 + assert(!hr->continuesHumongous(), "should not be HC region");
3325 + assert(hr == g1h->heap_region_containing(start), "sanity");
3326 + assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
3327 + assert(0 <= worker_i && worker_i < _max_task_num, "oob");
3328 +
3329 + BitMap& task_card_bm = count_card_bitmap_for(worker_i);
3330 + size_t* marked_bytes_array = count_marked_bytes_for(worker_i);
3331 +
3332 + // Below, the term "card num" means the result of shifting an address
3333 + // by the card shift -- address 0 corresponds to card number 0. One
3334 + // must subtract the card num of the bottom of the heap to obtain a
3335 + // card table index.
3336 +
3337 + intptr_t start_card_num =
3338 + intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3339 + intptr_t last_card_num =
3340 + intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift);
3341 +
3342 + intptr_t bottom_card_num =
3343 + intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
3344 + CardTableModRefBS::card_shift);
3345 +
3346 + BitMap::idx_t start_idx = start_card_num - bottom_card_num;
3347 + BitMap::idx_t last_idx = last_card_num - bottom_card_num;
3348 +
3349 + // The card bitmap is task/worker specific => no need to use 'par' routines.
3350 + // Inclusive bit range [start_idx, last_idx]. set_range is exclusive
3351 + // so we have to also explicitly set the bit for last_idx.
3352 + // Passing last_idx+1 to the clear_range would work in most cases
3353 + // but could trip an OOB assertion.
3354 +
3355 + if ((last_idx - start_idx) > 0) {
3356 + task_card_bm.set_range(start_idx, last_idx);
3357 + }
3358 + task_card_bm.set_bit(last_idx);
3359 +
3360 + // Add to the task local marked bytes for this region.
3361 + marked_bytes_array[index] += mr.byte_size();
3362 +}
3363 +
3364 +void ConcurrentMark::add_to_count_data_for(oop obj, HeapRegion* hr, int worker_i) {
3365 + MemRegion mr((HeapWord*)obj, obj->size());
3366 + add_to_count_data_for(mr, hr, worker_i);
3367 +}
3368 +
3369 +void ConcurrentMark::add_to_count_data_for(MemRegion mr, int worker_i) {
3370 + HeapRegion* hr = _g1h->heap_region_containing(mr.start());
3371 + add_to_count_data_for(mr, hr, worker_i);
3372 +}
3373 +
3374 +void ConcurrentMark::add_to_count_data_for(oop obj, int worker_i) {
3375 + MemRegion mr((HeapWord*)obj, obj->size());
3376 + add_to_count_data_for(mr, worker_i);
3377 +}
3378 +
3379 +// Updates the counting data with liveness info recorded for a
3380 +// region (typically a GCLab).
3381 +void ConcurrentMark::add_to_count_data_for_region(MemRegion lab_mr,
3382 + BitMap* lab_card_bm,
3383 + intptr_t lab_bottom_card_num,
3384 + size_t lab_marked_bytes,
3385 + int worker_i) {
3386 + HeapRegion* hr = _g1h->heap_region_containing(lab_mr.start());
3387 +
3388 + BitMap& task_card_bm = count_card_bitmap_for(worker_i);
3389 + size_t* marked_bytes_array = count_marked_bytes_for(worker_i);
3390 +
3391 + // Below, the term "card num" means the result of shifting an address
3392 + // by the card shift -- address 0 corresponds to card number 0. One
3393 + // must subtract the card num of the bottom of the heap to obtain a
3394 + // card table index.
3395 +
3396 + intptr_t heap_bottom_card_num =
3397 + intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
3398 + CardTableModRefBS::card_shift);
3399 +
3400 + assert(intptr_t(uintptr_t(lab_mr.start()) >> CardTableModRefBS::card_shift) == lab_bottom_card_num,
3401 + "sanity");
3402 +
3403 + // We have to map the indices of set bits in lab_card_bm, using
3404 + // lab_bottom_card_num, to indices the card bitmap for the given task.
3405 +
3406 + BitMap::idx_t end_idx = lab_card_bm->size();
3407 + BitMap::idx_t start_idx = lab_card_bm->get_next_one_offset(0, end_idx);
3408 + while (start_idx < end_idx) {
3409 + assert(lab_card_bm->at(start_idx), "should be set");
3410 +
3411 + intptr_t lab_card_num = lab_bottom_card_num + start_idx;
3412 + BitMap::idx_t card_bm_idx = lab_card_num - heap_bottom_card_num;
3413 +
3414 + task_card_bm.set_bit(card_bm_idx);
3415 +
3416 + // Get the offset of the next set bit
3417 + start_idx = lab_card_bm->get_next_one_offset(start_idx+1, end_idx);
3418 + }
3419 +
3420 + // Now add to the marked bytes
3421 + marked_bytes_array[hr->hrs_index()] += lab_marked_bytes;
3422 +}
3423 +
3424 +void ConcurrentMark::clear_count_data_for_heap_region(HeapRegion* hr) {
3425 + // Clears the count data for the given region from _all_ of
3426 + // the per-task counting data structures.
3427 +
3428 + MemRegion used_region = hr->used_region();
3429 + HeapWord* start = used_region.start();
3430 + HeapWord* last = used_region.last();
3431 + size_t hr_index = hr->hrs_index();
3432 +
3433 + intptr_t bottom_card_num =
3434 + intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
3435 + CardTableModRefBS::card_shift);
3436 +
3437 + intptr_t start_card_num =
3438 + intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3439 + intptr_t last_card_num =
3440 + intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift);
3441 +
3442 + BitMap::idx_t start_idx = start_card_num - bottom_card_num;
3443 + BitMap::idx_t last_idx = last_card_num - bottom_card_num;
3444 +
3445 + size_t used_region_bytes = used_region.byte_size();
3446 + size_t marked_bytes = 0;
3447 +
3448 + for (int i=0; i < _max_task_num; i += 1) {
3449 + BitMap& task_card_bm = count_card_bitmap_for(i);
3450 + size_t* marked_bytes_array = count_marked_bytes_for(i);
3451 +
3452 + marked_bytes += marked_bytes_array[hr_index];
3453 + // clear the amount of marked bytes in the task array for this
3454 + // region
3455 + marked_bytes_array[hr_index] = 0;
3456 +
3457 + // Clear the inclusive range [start_idx, last_idx] from the
3458 + // card bitmap. The clear_range routine is exclusive so we
3459 + // need to also explicitly clear the bit at last_idx.
3460 + // Passing last_idx+1 to the clear_range would work in
3461 + // most cases but could trip an OOB assertion.
3462 +
3463 + if ((last_idx - start_idx) > 0) {
3464 + task_card_bm.clear_range(start_idx, last_idx);
3465 + }
3466 + task_card_bm.clear_bit(last_idx);
3467 + }
3468 + // We could assert here that marked_bytes == used_region_bytes
3469 +}
3470 +
2845 3471 void ConcurrentMark::print_stats() {
2846 3472 if (verbose_stats()) {
2847 3473 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2848 3474 for (size_t i = 0; i < _active_tasks; ++i) {
2849 3475 _tasks[i]->print_stats();
2850 3476 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2851 3477 }
2852 3478 }
2853 3479 }
2854 3480
2855 3481 class CSMarkOopClosure: public OopClosure {
2856 3482 friend class CSMarkBitMapClosure;
2857 3483
2858 3484 G1CollectedHeap* _g1h;
2859 3485 CMBitMap* _bm;
2860 3486 ConcurrentMark* _cm;
2861 3487 oop* _ms;
2862 3488 jint* _array_ind_stack;
2863 3489 int _ms_size;
2864 3490 int _ms_ind;
2865 3491 int _array_increment;
2866 3492
2867 3493 bool push(oop obj, int arr_ind = 0) {
2868 3494 if (_ms_ind == _ms_size) {
2869 3495 gclog_or_tty->print_cr("Mark stack is full.");
2870 3496 return false;
2871 3497 }
2872 3498 _ms[_ms_ind] = obj;
2873 3499 if (obj->is_objArray()) {
2874 3500 _array_ind_stack[_ms_ind] = arr_ind;
2875 3501 }
2876 3502 _ms_ind++;
2877 3503 return true;
2878 3504 }
2879 3505
2880 3506 oop pop() {
2881 3507 if (_ms_ind == 0) {
2882 3508 return NULL;
2883 3509 } else {
2884 3510 _ms_ind--;
2885 3511 return _ms[_ms_ind];
2886 3512 }
2887 3513 }
2888 3514
2889 3515 template <class T> bool drain() {
2890 3516 while (_ms_ind > 0) {
2891 3517 oop obj = pop();
2892 3518 assert(obj != NULL, "Since index was non-zero.");
2893 3519 if (obj->is_objArray()) {
2894 3520 jint arr_ind = _array_ind_stack[_ms_ind];
2895 3521 objArrayOop aobj = objArrayOop(obj);
2896 3522 jint len = aobj->length();
2897 3523 jint next_arr_ind = arr_ind + _array_increment;
2898 3524 if (next_arr_ind < len) {
2899 3525 push(obj, next_arr_ind);
2900 3526 }
2901 3527 // Now process this portion of this one.
2902 3528 int lim = MIN2(next_arr_ind, len);
2903 3529 for (int j = arr_ind; j < lim; j++) {
2904 3530 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2905 3531 }
2906 3532
2907 3533 } else {
2908 3534 obj->oop_iterate(this);
2909 3535 }
2910 3536 if (abort()) return false;
2911 3537 }
2912 3538 return true;
2913 3539 }
2914 3540
2915 3541 public:
2916 3542 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2917 3543 _g1h(G1CollectedHeap::heap()),
2918 3544 _cm(cm),
2919 3545 _bm(cm->nextMarkBitMap()),
2920 3546 _ms_size(ms_size), _ms_ind(0),
2921 3547 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2922 3548 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2923 3549 _array_increment(MAX2(ms_size/8, 16))
2924 3550 {}
2925 3551
2926 3552 ~CSMarkOopClosure() {
2927 3553 FREE_C_HEAP_ARRAY(oop, _ms);
2928 3554 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2929 3555 }
2930 3556
2931 3557 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2932 3558 virtual void do_oop( oop* p) { do_oop_work(p); }
2933 3559
2934 3560 template <class T> void do_oop_work(T* p) {
2935 3561 T heap_oop = oopDesc::load_heap_oop(p);
2936 3562 if (oopDesc::is_null(heap_oop)) return;
2937 3563 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2938 3564 if (obj->is_forwarded()) {
2939 3565 // If the object has already been forwarded, we have to make sure
↓ open down ↓ |
85 lines elided |
↑ open up ↑ |
2940 3566 // that it's marked. So follow the forwarding pointer. Note that
2941 3567 // this does the right thing for self-forwarding pointers in the
2942 3568 // evacuation failure case.
2943 3569 obj = obj->forwardee();
2944 3570 }
2945 3571 HeapRegion* hr = _g1h->heap_region_containing(obj);
2946 3572 if (hr != NULL) {
2947 3573 if (hr->in_collection_set()) {
2948 3574 if (_g1h->is_obj_ill(obj)) {
2949 3575 _bm->mark((HeapWord*)obj);
3576 + // Update the task specific count data for object
3577 + _cm->add_to_count_data_for(obj, hr, 0 /* worker_i */);
3578 +
2950 3579 if (!push(obj)) {
2951 3580 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2952 3581 set_abort();
2953 3582 }
2954 3583 }
2955 3584 } else {
2956 3585 // Outside the collection set; we need to gray it
2957 3586 _cm->deal_with_reference(obj);
2958 3587 }
2959 3588 }
2960 3589 }
2961 3590 };
2962 3591
2963 3592 class CSMarkBitMapClosure: public BitMapClosure {
2964 3593 G1CollectedHeap* _g1h;
2965 3594 CMBitMap* _bitMap;
2966 3595 ConcurrentMark* _cm;
2967 3596 CSMarkOopClosure _oop_cl;
2968 3597 public:
2969 3598 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2970 3599 _g1h(G1CollectedHeap::heap()),
2971 3600 _bitMap(cm->nextMarkBitMap()),
2972 3601 _oop_cl(cm, ms_size)
2973 3602 {}
2974 3603
2975 3604 ~CSMarkBitMapClosure() {}
2976 3605
2977 3606 bool do_bit(size_t offset) {
2978 3607 // convert offset into a HeapWord*
2979 3608 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2980 3609 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2981 3610 "address out of range");
2982 3611 assert(_bitMap->isMarked(addr), "tautology");
2983 3612 oop obj = oop(addr);
2984 3613 if (!obj->is_forwarded()) {
2985 3614 if (!_oop_cl.push(obj)) return false;
2986 3615 if (UseCompressedOops) {
2987 3616 if (!_oop_cl.drain<narrowOop>()) return false;
2988 3617 } else {
2989 3618 if (!_oop_cl.drain<oop>()) return false;
2990 3619 }
2991 3620 }
2992 3621 // Otherwise...
2993 3622 return true;
2994 3623 }
2995 3624 };
2996 3625
2997 3626
2998 3627 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2999 3628 CMBitMap* _bm;
3000 3629 CSMarkBitMapClosure _bit_cl;
3001 3630 enum SomePrivateConstants {
3002 3631 MSSize = 1000
3003 3632 };
3004 3633 bool _completed;
3005 3634 public:
3006 3635 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
3007 3636 _bm(cm->nextMarkBitMap()),
3008 3637 _bit_cl(cm, MSSize),
3009 3638 _completed(true)
3010 3639 {}
3011 3640
3012 3641 ~CompleteMarkingInCSHRClosure() {}
3013 3642
3014 3643 bool doHeapRegion(HeapRegion* r) {
3015 3644 if (!r->evacuation_failed()) {
3016 3645 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
3017 3646 if (!mr.is_empty()) {
3018 3647 if (!_bm->iterate(&_bit_cl, mr)) {
3019 3648 _completed = false;
3020 3649 return true;
↓ open down ↓ |
61 lines elided |
↑ open up ↑ |
3021 3650 }
3022 3651 }
3023 3652 }
3024 3653 return false;
3025 3654 }
3026 3655
3027 3656 bool completed() { return _completed; }
3028 3657 };
3029 3658
3030 3659 class ClearMarksInHRClosure: public HeapRegionClosure {
3660 + ConcurrentMark* _cm;
3031 3661 CMBitMap* _bm;
3032 3662 public:
3033 - ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
3663 + ClearMarksInHRClosure(ConcurrentMark* cm, CMBitMap* bm):
3664 + _cm(cm), _bm(bm)
3665 + { }
3034 3666
3035 3667 bool doHeapRegion(HeapRegion* r) {
3036 3668 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3037 - MemRegion usedMR = r->used_region();
3038 3669 _bm->clearRange(r->used_region());
3670 + // Need to remove values from the count info
3671 + _cm->clear_count_data_for_heap_region(r);
3039 3672 }
3040 3673 return false;
3041 3674 }
3042 3675 };
3043 3676
3044 3677 void ConcurrentMark::complete_marking_in_collection_set() {
3045 3678 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3046 3679
3047 3680 if (!g1h->mark_in_progress()) {
3048 3681 g1h->g1_policy()->record_mark_closure_time(0.0);
3049 3682 return;
3050 3683 }
3051 3684
3052 3685 int i = 1;
3053 3686 double start = os::elapsedTime();
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
3054 3687 while (true) {
3055 3688 i++;
3056 3689 CompleteMarkingInCSHRClosure cmplt(this);
3057 3690 g1h->collection_set_iterate(&cmplt);
3058 3691 if (cmplt.completed()) break;
3059 3692 }
3060 3693 double end_time = os::elapsedTime();
3061 3694 double elapsed_time_ms = (end_time - start) * 1000.0;
3062 3695 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3063 3696
3064 - ClearMarksInHRClosure clr(nextMarkBitMap());
3697 + ClearMarksInHRClosure clr(this, nextMarkBitMap());
3065 3698 g1h->collection_set_iterate(&clr);
3066 3699 }
3067 3700
3068 3701 // The next two methods deal with the following optimisation. Some
3069 3702 // objects are gray by being marked and located above the finger. If
3070 3703 // they are copied, during an evacuation pause, below the finger then
3071 3704 // the need to be pushed on the stack. The observation is that, if
3072 3705 // there are no regions in the collection set located above the
3073 3706 // finger, then the above cannot happen, hence we do not need to
3074 3707 // explicitly gray any objects when copying them to below the
3075 3708 // finger. The global stack will be scanned to ensure that, if it
3076 3709 // points to objects being copied, it will update their
3077 3710 // location. There is a tricky situation with the gray objects in
3078 3711 // region stack that are being coped, however. See the comment in
3079 3712 // newCSet().
3080 3713
3081 3714 void ConcurrentMark::newCSet() {
3082 3715 if (!concurrent_marking_in_progress()) {
3083 3716 // nothing to do if marking is not in progress
3084 3717 return;
3085 3718 }
3086 3719
3087 3720 // find what the lowest finger is among the global and local fingers
3088 3721 _min_finger = _finger;
3089 3722 for (int i = 0; i < (int)_max_task_num; ++i) {
3090 3723 CMTask* task = _tasks[i];
3091 3724 HeapWord* task_finger = task->finger();
3092 3725 if (task_finger != NULL && task_finger < _min_finger) {
3093 3726 _min_finger = task_finger;
3094 3727 }
3095 3728 }
3096 3729
3097 3730 _should_gray_objects = false;
3098 3731
3099 3732 // This fixes a very subtle and fustrating bug. It might be the case
3100 3733 // that, during en evacuation pause, heap regions that contain
3101 3734 // objects that are gray (by being in regions contained in the
3102 3735 // region stack) are included in the collection set. Since such gray
3103 3736 // objects will be moved, and because it's not easy to redirect
3104 3737 // region stack entries to point to a new location (because objects
3105 3738 // in one region might be scattered to multiple regions after they
3106 3739 // are copied), one option is to ensure that all marked objects
3107 3740 // copied during a pause are pushed on the stack. Notice, however,
3108 3741 // that this problem can only happen when the region stack is not
3109 3742 // empty during an evacuation pause. So, we make the fix a bit less
3110 3743 // conservative and ensure that regions are pushed on the stack,
3111 3744 // irrespective whether all collection set regions are below the
3112 3745 // finger, if the region stack is not empty. This is expected to be
3113 3746 // a rare case, so I don't think it's necessary to be smarted about it.
3114 3747 if (!region_stack_empty() || has_aborted_regions()) {
3115 3748 _should_gray_objects = true;
3116 3749 }
3117 3750 }
3118 3751
3119 3752 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3120 3753 if (!concurrent_marking_in_progress()) return;
3121 3754
3122 3755 HeapWord* region_end = hr->end();
3123 3756 if (region_end > _min_finger) {
3124 3757 _should_gray_objects = true;
3125 3758 }
3126 3759 }
3127 3760
3128 3761 // Resets the region fields of active CMTasks whose values point
3129 3762 // into the collection set.
3130 3763 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3131 3764 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3132 3765 assert(parallel_marking_threads() <= _max_task_num, "sanity");
3133 3766
3134 3767 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3135 3768 CMTask* task = _tasks[i];
3136 3769 HeapWord* task_finger = task->finger();
3137 3770 if (task_finger != NULL) {
3138 3771 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3139 3772 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3140 3773 if (finger_region->in_collection_set()) {
3141 3774 // The task's current region is in the collection set.
3142 3775 // This region will be evacuated in the current GC and
3143 3776 // the region fields in the task will be stale.
3144 3777 task->giveup_current_region();
3145 3778 }
3146 3779 }
3147 3780 }
3148 3781 }
3149 3782
3150 3783 // abandon current marking iteration due to a Full GC
3151 3784 void ConcurrentMark::abort() {
3152 3785 // Clear all marks to force marking thread to do nothing
3153 3786 _nextMarkBitMap->clearAll();
3154 3787 // Empty mark stack
3155 3788 clear_marking_state();
3156 3789 for (int i = 0; i < (int)_max_task_num; ++i) {
3157 3790 _tasks[i]->clear_region_fields();
3158 3791 }
3159 3792 _has_aborted = true;
3160 3793
3161 3794 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3162 3795 satb_mq_set.abandon_partial_marking();
3163 3796 // This can be called either during or outside marking, we'll read
3164 3797 // the expected_active value from the SATB queue set.
3165 3798 satb_mq_set.set_active_all_threads(
3166 3799 false, /* new active value */
3167 3800 satb_mq_set.is_active() /* expected_active */);
3168 3801 }
3169 3802
3170 3803 static void print_ms_time_info(const char* prefix, const char* name,
3171 3804 NumberSeq& ns) {
3172 3805 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3173 3806 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3174 3807 if (ns.num() > 0) {
3175 3808 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3176 3809 prefix, ns.sd(), ns.maximum());
3177 3810 }
3178 3811 }
3179 3812
3180 3813 void ConcurrentMark::print_summary_info() {
3181 3814 gclog_or_tty->print_cr(" Concurrent marking:");
3182 3815 print_ms_time_info(" ", "init marks", _init_times);
3183 3816 print_ms_time_info(" ", "remarks", _remark_times);
3184 3817 {
3185 3818 print_ms_time_info(" ", "final marks", _remark_mark_times);
3186 3819 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3187 3820
3188 3821 }
3189 3822 print_ms_time_info(" ", "cleanups", _cleanup_times);
3190 3823 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3191 3824 _total_counting_time,
3192 3825 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3193 3826 (double)_cleanup_times.num()
3194 3827 : 0.0));
3195 3828 if (G1ScrubRemSets) {
↓ open down ↓ |
121 lines elided |
↑ open up ↑ |
3196 3829 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3197 3830 _total_rs_scrub_time,
3198 3831 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3199 3832 (double)_cleanup_times.num()
3200 3833 : 0.0));
3201 3834 }
3202 3835 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3203 3836 (_init_times.sum() + _remark_times.sum() +
3204 3837 _cleanup_times.sum())/1000.0);
3205 3838 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3206 - "(%8.2f s marking, %8.2f s counting).",
3839 + "(%8.2f s marking).",
3207 3840 cmThread()->vtime_accum(),
3208 - cmThread()->vtime_mark_accum(),
3209 - cmThread()->vtime_count_accum());
3841 + cmThread()->vtime_mark_accum());
3210 3842 }
3211 3843
3212 3844 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3213 3845 _parallel_workers->print_worker_threads_on(st);
3214 3846 }
3215 3847
3216 3848 // Closures
3217 3849 // XXX: there seems to be a lot of code duplication here;
3218 3850 // should refactor and consolidate the shared code.
3219 3851
3220 3852 // This closure is used to mark refs into the CMS generation in
3221 3853 // the CMS bit map. Called at the first checkpoint.
3222 3854
3223 3855 // We take a break if someone is trying to stop the world.
3224 3856 bool ConcurrentMark::do_yield_check(int worker_i) {
3225 3857 if (should_yield()) {
3226 3858 if (worker_i == 0) {
3227 3859 _g1h->g1_policy()->record_concurrent_pause();
3228 3860 }
3229 3861 cmThread()->yield();
3230 3862 if (worker_i == 0) {
3231 3863 _g1h->g1_policy()->record_concurrent_pause_end();
3232 3864 }
3233 3865 return true;
3234 3866 } else {
3235 3867 return false;
3236 3868 }
3237 3869 }
3238 3870
3239 3871 bool ConcurrentMark::should_yield() {
3240 3872 return cmThread()->should_yield();
3241 3873 }
3242 3874
3243 3875 bool ConcurrentMark::containing_card_is_marked(void* p) {
3244 3876 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3245 3877 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3246 3878 }
3247 3879
3248 3880 bool ConcurrentMark::containing_cards_are_marked(void* start,
3249 3881 void* last) {
3250 3882 return containing_card_is_marked(start) &&
3251 3883 containing_card_is_marked(last);
3252 3884 }
3253 3885
3254 3886 #ifndef PRODUCT
3255 3887 // for debugging purposes
3256 3888 void ConcurrentMark::print_finger() {
3257 3889 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3258 3890 _heap_start, _heap_end, _finger);
3259 3891 for (int i = 0; i < (int) _max_task_num; ++i) {
3260 3892 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
3261 3893 }
3262 3894 gclog_or_tty->print_cr("");
3263 3895 }
3264 3896 #endif
3265 3897
3266 3898 void CMTask::scan_object(oop obj) {
3267 3899 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3268 3900
3269 3901 if (_cm->verbose_high()) {
3270 3902 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3271 3903 _task_id, (void*) obj);
3272 3904 }
3273 3905
3274 3906 size_t obj_size = obj->size();
3275 3907 _words_scanned += obj_size;
3276 3908
3277 3909 obj->oop_iterate(_cm_oop_closure);
3278 3910 statsOnly( ++_objs_scanned );
3279 3911 check_limits();
3280 3912 }
3281 3913
3282 3914 // Closure for iteration over bitmaps
3283 3915 class CMBitMapClosure : public BitMapClosure {
3284 3916 private:
3285 3917 // the bitmap that is being iterated over
3286 3918 CMBitMap* _nextMarkBitMap;
3287 3919 ConcurrentMark* _cm;
3288 3920 CMTask* _task;
3289 3921 // true if we're scanning a heap region claimed by the task (so that
3290 3922 // we move the finger along), false if we're not, i.e. currently when
3291 3923 // scanning a heap region popped from the region stack (so that we
3292 3924 // do not move the task finger along; it'd be a mistake if we did so).
3293 3925 bool _scanning_heap_region;
3294 3926
3295 3927 public:
3296 3928 CMBitMapClosure(CMTask *task,
3297 3929 ConcurrentMark* cm,
3298 3930 CMBitMap* nextMarkBitMap)
3299 3931 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3300 3932
3301 3933 void set_scanning_heap_region(bool scanning_heap_region) {
3302 3934 _scanning_heap_region = scanning_heap_region;
3303 3935 }
3304 3936
3305 3937 bool do_bit(size_t offset) {
3306 3938 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3307 3939 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3308 3940 assert( addr < _cm->finger(), "invariant");
3309 3941
3310 3942 if (_scanning_heap_region) {
3311 3943 statsOnly( _task->increase_objs_found_on_bitmap() );
3312 3944 assert(addr >= _task->finger(), "invariant");
3313 3945 // We move that task's local finger along.
3314 3946 _task->move_finger_to(addr);
3315 3947 } else {
3316 3948 // We move the task's region finger along.
3317 3949 _task->move_region_finger_to(addr);
3318 3950 }
3319 3951
3320 3952 _task->scan_object(oop(addr));
3321 3953 // we only partially drain the local queue and global stack
3322 3954 _task->drain_local_queue(true);
3323 3955 _task->drain_global_stack(true);
3324 3956
3325 3957 // if the has_aborted flag has been raised, we need to bail out of
3326 3958 // the iteration
3327 3959 return !_task->has_aborted();
3328 3960 }
3329 3961 };
3330 3962
3331 3963 // Closure for iterating over objects, currently only used for
3332 3964 // processing SATB buffers.
3333 3965 class CMObjectClosure : public ObjectClosure {
3334 3966 private:
3335 3967 CMTask* _task;
3336 3968
3337 3969 public:
3338 3970 void do_object(oop obj) {
3339 3971 _task->deal_with_reference(obj);
3340 3972 }
3341 3973
3342 3974 CMObjectClosure(CMTask* task) : _task(task) { }
3343 3975 };
3344 3976
3345 3977 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3346 3978 ConcurrentMark* cm,
3347 3979 CMTask* task)
3348 3980 : _g1h(g1h), _cm(cm), _task(task) {
3349 3981 assert(_ref_processor == NULL, "should be initialized to NULL");
3350 3982
3351 3983 if (G1UseConcMarkReferenceProcessing) {
3352 3984 _ref_processor = g1h->ref_processor_cm();
3353 3985 assert(_ref_processor != NULL, "should not be NULL");
3354 3986 }
3355 3987 }
3356 3988
3357 3989 void CMTask::setup_for_region(HeapRegion* hr) {
3358 3990 // Separated the asserts so that we know which one fires.
3359 3991 assert(hr != NULL,
3360 3992 "claim_region() should have filtered out continues humongous regions");
3361 3993 assert(!hr->continuesHumongous(),
3362 3994 "claim_region() should have filtered out continues humongous regions");
3363 3995
3364 3996 if (_cm->verbose_low()) {
3365 3997 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3366 3998 _task_id, hr);
3367 3999 }
3368 4000
3369 4001 _curr_region = hr;
3370 4002 _finger = hr->bottom();
3371 4003 update_region_limit();
3372 4004 }
3373 4005
3374 4006 void CMTask::update_region_limit() {
3375 4007 HeapRegion* hr = _curr_region;
3376 4008 HeapWord* bottom = hr->bottom();
3377 4009 HeapWord* limit = hr->next_top_at_mark_start();
3378 4010
3379 4011 if (limit == bottom) {
3380 4012 if (_cm->verbose_low()) {
3381 4013 gclog_or_tty->print_cr("[%d] found an empty region "
3382 4014 "["PTR_FORMAT", "PTR_FORMAT")",
3383 4015 _task_id, bottom, limit);
3384 4016 }
3385 4017 // The region was collected underneath our feet.
3386 4018 // We set the finger to bottom to ensure that the bitmap
3387 4019 // iteration that will follow this will not do anything.
3388 4020 // (this is not a condition that holds when we set the region up,
3389 4021 // as the region is not supposed to be empty in the first place)
3390 4022 _finger = bottom;
3391 4023 } else if (limit >= _region_limit) {
3392 4024 assert(limit >= _finger, "peace of mind");
3393 4025 } else {
3394 4026 assert(limit < _region_limit, "only way to get here");
3395 4027 // This can happen under some pretty unusual circumstances. An
3396 4028 // evacuation pause empties the region underneath our feet (NTAMS
3397 4029 // at bottom). We then do some allocation in the region (NTAMS
3398 4030 // stays at bottom), followed by the region being used as a GC
3399 4031 // alloc region (NTAMS will move to top() and the objects
3400 4032 // originally below it will be grayed). All objects now marked in
3401 4033 // the region are explicitly grayed, if below the global finger,
3402 4034 // and we do not need in fact to scan anything else. So, we simply
3403 4035 // set _finger to be limit to ensure that the bitmap iteration
3404 4036 // doesn't do anything.
3405 4037 _finger = limit;
3406 4038 }
3407 4039
3408 4040 _region_limit = limit;
3409 4041 }
3410 4042
3411 4043 void CMTask::giveup_current_region() {
3412 4044 assert(_curr_region != NULL, "invariant");
3413 4045 if (_cm->verbose_low()) {
3414 4046 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3415 4047 _task_id, _curr_region);
3416 4048 }
3417 4049 clear_region_fields();
3418 4050 }
3419 4051
3420 4052 void CMTask::clear_region_fields() {
3421 4053 // Values for these three fields that indicate that we're not
3422 4054 // holding on to a region.
3423 4055 _curr_region = NULL;
3424 4056 _finger = NULL;
3425 4057 _region_limit = NULL;
3426 4058
3427 4059 _region_finger = NULL;
3428 4060 }
3429 4061
3430 4062 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3431 4063 if (cm_oop_closure == NULL) {
3432 4064 assert(_cm_oop_closure != NULL, "invariant");
3433 4065 } else {
3434 4066 assert(_cm_oop_closure == NULL, "invariant");
3435 4067 }
3436 4068 _cm_oop_closure = cm_oop_closure;
3437 4069 }
3438 4070
3439 4071 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3440 4072 guarantee(nextMarkBitMap != NULL, "invariant");
3441 4073
3442 4074 if (_cm->verbose_low()) {
3443 4075 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3444 4076 }
3445 4077
3446 4078 _nextMarkBitMap = nextMarkBitMap;
3447 4079 clear_region_fields();
3448 4080 assert(_aborted_region.is_empty(), "should have been cleared");
3449 4081
3450 4082 _calls = 0;
3451 4083 _elapsed_time_ms = 0.0;
3452 4084 _termination_time_ms = 0.0;
3453 4085 _termination_start_time_ms = 0.0;
3454 4086
3455 4087 #if _MARKING_STATS_
3456 4088 _local_pushes = 0;
3457 4089 _local_pops = 0;
3458 4090 _local_max_size = 0;
3459 4091 _objs_scanned = 0;
3460 4092 _global_pushes = 0;
3461 4093 _global_pops = 0;
3462 4094 _global_max_size = 0;
3463 4095 _global_transfers_to = 0;
3464 4096 _global_transfers_from = 0;
3465 4097 _region_stack_pops = 0;
3466 4098 _regions_claimed = 0;
3467 4099 _objs_found_on_bitmap = 0;
3468 4100 _satb_buffers_processed = 0;
3469 4101 _steal_attempts = 0;
3470 4102 _steals = 0;
3471 4103 _aborted = 0;
3472 4104 _aborted_overflow = 0;
3473 4105 _aborted_cm_aborted = 0;
3474 4106 _aborted_yield = 0;
3475 4107 _aborted_timed_out = 0;
3476 4108 _aborted_satb = 0;
3477 4109 _aborted_termination = 0;
3478 4110 #endif // _MARKING_STATS_
3479 4111 }
3480 4112
3481 4113 bool CMTask::should_exit_termination() {
3482 4114 regular_clock_call();
3483 4115 // This is called when we are in the termination protocol. We should
3484 4116 // quit if, for some reason, this task wants to abort or the global
3485 4117 // stack is not empty (this means that we can get work from it).
3486 4118 return !_cm->mark_stack_empty() || has_aborted();
3487 4119 }
3488 4120
3489 4121 void CMTask::reached_limit() {
3490 4122 assert(_words_scanned >= _words_scanned_limit ||
3491 4123 _refs_reached >= _refs_reached_limit ,
3492 4124 "shouldn't have been called otherwise");
3493 4125 regular_clock_call();
3494 4126 }
3495 4127
3496 4128 void CMTask::regular_clock_call() {
3497 4129 if (has_aborted()) return;
3498 4130
3499 4131 // First, we need to recalculate the words scanned and refs reached
3500 4132 // limits for the next clock call.
3501 4133 recalculate_limits();
3502 4134
3503 4135 // During the regular clock call we do the following
3504 4136
3505 4137 // (1) If an overflow has been flagged, then we abort.
3506 4138 if (_cm->has_overflown()) {
3507 4139 set_has_aborted();
3508 4140 return;
3509 4141 }
3510 4142
3511 4143 // If we are not concurrent (i.e. we're doing remark) we don't need
3512 4144 // to check anything else. The other steps are only needed during
3513 4145 // the concurrent marking phase.
3514 4146 if (!concurrent()) return;
3515 4147
3516 4148 // (2) If marking has been aborted for Full GC, then we also abort.
3517 4149 if (_cm->has_aborted()) {
3518 4150 set_has_aborted();
3519 4151 statsOnly( ++_aborted_cm_aborted );
3520 4152 return;
3521 4153 }
3522 4154
3523 4155 double curr_time_ms = os::elapsedVTime() * 1000.0;
3524 4156
3525 4157 // (3) If marking stats are enabled, then we update the step history.
3526 4158 #if _MARKING_STATS_
3527 4159 if (_words_scanned >= _words_scanned_limit) {
3528 4160 ++_clock_due_to_scanning;
3529 4161 }
3530 4162 if (_refs_reached >= _refs_reached_limit) {
3531 4163 ++_clock_due_to_marking;
3532 4164 }
3533 4165
3534 4166 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3535 4167 _interval_start_time_ms = curr_time_ms;
3536 4168 _all_clock_intervals_ms.add(last_interval_ms);
3537 4169
3538 4170 if (_cm->verbose_medium()) {
3539 4171 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3540 4172 "scanned = %d%s, refs reached = %d%s",
3541 4173 _task_id, last_interval_ms,
3542 4174 _words_scanned,
3543 4175 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3544 4176 _refs_reached,
3545 4177 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3546 4178 }
3547 4179 #endif // _MARKING_STATS_
3548 4180
3549 4181 // (4) We check whether we should yield. If we have to, then we abort.
3550 4182 if (_cm->should_yield()) {
3551 4183 // We should yield. To do this we abort the task. The caller is
3552 4184 // responsible for yielding.
3553 4185 set_has_aborted();
3554 4186 statsOnly( ++_aborted_yield );
3555 4187 return;
3556 4188 }
3557 4189
3558 4190 // (5) We check whether we've reached our time quota. If we have,
3559 4191 // then we abort.
3560 4192 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3561 4193 if (elapsed_time_ms > _time_target_ms) {
3562 4194 set_has_aborted();
3563 4195 _has_timed_out = true;
3564 4196 statsOnly( ++_aborted_timed_out );
3565 4197 return;
3566 4198 }
3567 4199
3568 4200 // (6) Finally, we check whether there are enough completed STAB
3569 4201 // buffers available for processing. If there are, we abort.
3570 4202 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3571 4203 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3572 4204 if (_cm->verbose_low()) {
3573 4205 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3574 4206 _task_id);
3575 4207 }
3576 4208 // we do need to process SATB buffers, we'll abort and restart
3577 4209 // the marking task to do so
3578 4210 set_has_aborted();
3579 4211 statsOnly( ++_aborted_satb );
3580 4212 return;
3581 4213 }
3582 4214 }
3583 4215
3584 4216 void CMTask::recalculate_limits() {
3585 4217 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3586 4218 _words_scanned_limit = _real_words_scanned_limit;
3587 4219
3588 4220 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3589 4221 _refs_reached_limit = _real_refs_reached_limit;
3590 4222 }
3591 4223
3592 4224 void CMTask::decrease_limits() {
3593 4225 // This is called when we believe that we're going to do an infrequent
3594 4226 // operation which will increase the per byte scanned cost (i.e. move
3595 4227 // entries to/from the global stack). It basically tries to decrease the
3596 4228 // scanning limit so that the clock is called earlier.
3597 4229
3598 4230 if (_cm->verbose_medium()) {
3599 4231 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3600 4232 }
3601 4233
3602 4234 _words_scanned_limit = _real_words_scanned_limit -
3603 4235 3 * words_scanned_period / 4;
3604 4236 _refs_reached_limit = _real_refs_reached_limit -
3605 4237 3 * refs_reached_period / 4;
3606 4238 }
3607 4239
3608 4240 void CMTask::move_entries_to_global_stack() {
3609 4241 // local array where we'll store the entries that will be popped
3610 4242 // from the local queue
3611 4243 oop buffer[global_stack_transfer_size];
3612 4244
3613 4245 int n = 0;
3614 4246 oop obj;
3615 4247 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3616 4248 buffer[n] = obj;
3617 4249 ++n;
3618 4250 }
3619 4251
3620 4252 if (n > 0) {
3621 4253 // we popped at least one entry from the local queue
3622 4254
3623 4255 statsOnly( ++_global_transfers_to; _local_pops += n );
3624 4256
3625 4257 if (!_cm->mark_stack_push(buffer, n)) {
3626 4258 if (_cm->verbose_low()) {
3627 4259 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3628 4260 _task_id);
3629 4261 }
3630 4262 set_has_aborted();
3631 4263 } else {
3632 4264 // the transfer was successful
3633 4265
3634 4266 if (_cm->verbose_medium()) {
3635 4267 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3636 4268 _task_id, n);
3637 4269 }
3638 4270 statsOnly( int tmp_size = _cm->mark_stack_size();
3639 4271 if (tmp_size > _global_max_size) {
3640 4272 _global_max_size = tmp_size;
3641 4273 }
3642 4274 _global_pushes += n );
3643 4275 }
3644 4276 }
3645 4277
3646 4278 // this operation was quite expensive, so decrease the limits
3647 4279 decrease_limits();
3648 4280 }
3649 4281
3650 4282 void CMTask::get_entries_from_global_stack() {
3651 4283 // local array where we'll store the entries that will be popped
3652 4284 // from the global stack.
3653 4285 oop buffer[global_stack_transfer_size];
3654 4286 int n;
3655 4287 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3656 4288 assert(n <= global_stack_transfer_size,
3657 4289 "we should not pop more than the given limit");
3658 4290 if (n > 0) {
3659 4291 // yes, we did actually pop at least one entry
3660 4292
3661 4293 statsOnly( ++_global_transfers_from; _global_pops += n );
3662 4294 if (_cm->verbose_medium()) {
3663 4295 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3664 4296 _task_id, n);
3665 4297 }
3666 4298 for (int i = 0; i < n; ++i) {
3667 4299 bool success = _task_queue->push(buffer[i]);
3668 4300 // We only call this when the local queue is empty or under a
3669 4301 // given target limit. So, we do not expect this push to fail.
3670 4302 assert(success, "invariant");
3671 4303 }
3672 4304
3673 4305 statsOnly( int tmp_size = _task_queue->size();
3674 4306 if (tmp_size > _local_max_size) {
3675 4307 _local_max_size = tmp_size;
3676 4308 }
3677 4309 _local_pushes += n );
3678 4310 }
3679 4311
3680 4312 // this operation was quite expensive, so decrease the limits
3681 4313 decrease_limits();
3682 4314 }
3683 4315
3684 4316 void CMTask::drain_local_queue(bool partially) {
3685 4317 if (has_aborted()) return;
3686 4318
3687 4319 // Decide what the target size is, depending whether we're going to
3688 4320 // drain it partially (so that other tasks can steal if they run out
3689 4321 // of things to do) or totally (at the very end).
3690 4322 size_t target_size;
3691 4323 if (partially) {
3692 4324 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3693 4325 } else {
3694 4326 target_size = 0;
3695 4327 }
3696 4328
3697 4329 if (_task_queue->size() > target_size) {
3698 4330 if (_cm->verbose_high()) {
3699 4331 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3700 4332 _task_id, target_size);
3701 4333 }
3702 4334
3703 4335 oop obj;
3704 4336 bool ret = _task_queue->pop_local(obj);
3705 4337 while (ret) {
3706 4338 statsOnly( ++_local_pops );
3707 4339
3708 4340 if (_cm->verbose_high()) {
3709 4341 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3710 4342 (void*) obj);
3711 4343 }
3712 4344
3713 4345 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3714 4346 assert(!_g1h->is_on_master_free_list(
3715 4347 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3716 4348
3717 4349 scan_object(obj);
3718 4350
3719 4351 if (_task_queue->size() <= target_size || has_aborted()) {
3720 4352 ret = false;
3721 4353 } else {
3722 4354 ret = _task_queue->pop_local(obj);
3723 4355 }
3724 4356 }
3725 4357
3726 4358 if (_cm->verbose_high()) {
3727 4359 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3728 4360 _task_id, _task_queue->size());
3729 4361 }
3730 4362 }
3731 4363 }
3732 4364
3733 4365 void CMTask::drain_global_stack(bool partially) {
3734 4366 if (has_aborted()) return;
3735 4367
3736 4368 // We have a policy to drain the local queue before we attempt to
3737 4369 // drain the global stack.
3738 4370 assert(partially || _task_queue->size() == 0, "invariant");
3739 4371
3740 4372 // Decide what the target size is, depending whether we're going to
3741 4373 // drain it partially (so that other tasks can steal if they run out
3742 4374 // of things to do) or totally (at the very end). Notice that,
3743 4375 // because we move entries from the global stack in chunks or
3744 4376 // because another task might be doing the same, we might in fact
3745 4377 // drop below the target. But, this is not a problem.
3746 4378 size_t target_size;
3747 4379 if (partially) {
3748 4380 target_size = _cm->partial_mark_stack_size_target();
3749 4381 } else {
3750 4382 target_size = 0;
3751 4383 }
3752 4384
3753 4385 if (_cm->mark_stack_size() > target_size) {
3754 4386 if (_cm->verbose_low()) {
3755 4387 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3756 4388 _task_id, target_size);
3757 4389 }
3758 4390
3759 4391 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3760 4392 get_entries_from_global_stack();
3761 4393 drain_local_queue(partially);
3762 4394 }
3763 4395
3764 4396 if (_cm->verbose_low()) {
3765 4397 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3766 4398 _task_id, _cm->mark_stack_size());
3767 4399 }
3768 4400 }
3769 4401 }
3770 4402
3771 4403 // SATB Queue has several assumptions on whether to call the par or
3772 4404 // non-par versions of the methods. this is why some of the code is
3773 4405 // replicated. We should really get rid of the single-threaded version
3774 4406 // of the code to simplify things.
3775 4407 void CMTask::drain_satb_buffers() {
3776 4408 if (has_aborted()) return;
3777 4409
3778 4410 // We set this so that the regular clock knows that we're in the
3779 4411 // middle of draining buffers and doesn't set the abort flag when it
3780 4412 // notices that SATB buffers are available for draining. It'd be
3781 4413 // very counter productive if it did that. :-)
3782 4414 _draining_satb_buffers = true;
3783 4415
3784 4416 CMObjectClosure oc(this);
3785 4417 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3786 4418 if (G1CollectedHeap::use_parallel_gc_threads()) {
3787 4419 satb_mq_set.set_par_closure(_task_id, &oc);
3788 4420 } else {
3789 4421 satb_mq_set.set_closure(&oc);
3790 4422 }
3791 4423
3792 4424 // This keeps claiming and applying the closure to completed buffers
3793 4425 // until we run out of buffers or we need to abort.
3794 4426 if (G1CollectedHeap::use_parallel_gc_threads()) {
3795 4427 while (!has_aborted() &&
3796 4428 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3797 4429 if (_cm->verbose_medium()) {
3798 4430 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3799 4431 }
3800 4432 statsOnly( ++_satb_buffers_processed );
3801 4433 regular_clock_call();
3802 4434 }
3803 4435 } else {
3804 4436 while (!has_aborted() &&
3805 4437 satb_mq_set.apply_closure_to_completed_buffer()) {
3806 4438 if (_cm->verbose_medium()) {
3807 4439 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3808 4440 }
3809 4441 statsOnly( ++_satb_buffers_processed );
3810 4442 regular_clock_call();
3811 4443 }
3812 4444 }
3813 4445
3814 4446 if (!concurrent() && !has_aborted()) {
3815 4447 // We should only do this during remark.
3816 4448 if (G1CollectedHeap::use_parallel_gc_threads()) {
3817 4449 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3818 4450 } else {
3819 4451 satb_mq_set.iterate_closure_all_threads();
3820 4452 }
3821 4453 }
3822 4454
3823 4455 _draining_satb_buffers = false;
3824 4456
3825 4457 assert(has_aborted() ||
3826 4458 concurrent() ||
3827 4459 satb_mq_set.completed_buffers_num() == 0, "invariant");
3828 4460
3829 4461 if (G1CollectedHeap::use_parallel_gc_threads()) {
3830 4462 satb_mq_set.set_par_closure(_task_id, NULL);
3831 4463 } else {
3832 4464 satb_mq_set.set_closure(NULL);
3833 4465 }
3834 4466
3835 4467 // again, this was a potentially expensive operation, decrease the
3836 4468 // limits to get the regular clock call early
3837 4469 decrease_limits();
3838 4470 }
3839 4471
3840 4472 void CMTask::drain_region_stack(BitMapClosure* bc) {
3841 4473 if (has_aborted()) return;
3842 4474
3843 4475 assert(_region_finger == NULL,
3844 4476 "it should be NULL when we're not scanning a region");
3845 4477
3846 4478 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3847 4479 if (_cm->verbose_low()) {
3848 4480 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3849 4481 _task_id, _cm->region_stack_size());
3850 4482 }
3851 4483
3852 4484 MemRegion mr;
3853 4485
3854 4486 if (!_aborted_region.is_empty()) {
3855 4487 mr = _aborted_region;
3856 4488 _aborted_region = MemRegion();
3857 4489
3858 4490 if (_cm->verbose_low()) {
3859 4491 gclog_or_tty->print_cr("[%d] scanning aborted region "
3860 4492 "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3861 4493 _task_id, mr.start(), mr.end());
3862 4494 }
3863 4495 } else {
3864 4496 mr = _cm->region_stack_pop_lock_free();
3865 4497 // it returns MemRegion() if the pop fails
3866 4498 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3867 4499 }
3868 4500
3869 4501 while (mr.start() != NULL) {
3870 4502 if (_cm->verbose_medium()) {
3871 4503 gclog_or_tty->print_cr("[%d] we are scanning region "
3872 4504 "["PTR_FORMAT", "PTR_FORMAT")",
3873 4505 _task_id, mr.start(), mr.end());
3874 4506 }
3875 4507
3876 4508 assert(mr.end() <= _cm->finger(),
3877 4509 "otherwise the region shouldn't be on the stack");
3878 4510 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3879 4511 if (_nextMarkBitMap->iterate(bc, mr)) {
3880 4512 assert(!has_aborted(),
3881 4513 "cannot abort the task without aborting the bitmap iteration");
3882 4514
3883 4515 // We finished iterating over the region without aborting.
3884 4516 regular_clock_call();
3885 4517 if (has_aborted()) {
3886 4518 mr = MemRegion();
3887 4519 } else {
3888 4520 mr = _cm->region_stack_pop_lock_free();
3889 4521 // it returns MemRegion() if the pop fails
3890 4522 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3891 4523 }
3892 4524 } else {
3893 4525 assert(has_aborted(), "currently the only way to do so");
3894 4526
3895 4527 // The only way to abort the bitmap iteration is to return
3896 4528 // false from the do_bit() method. However, inside the
3897 4529 // do_bit() method we move the _region_finger to point to the
3898 4530 // object currently being looked at. So, if we bail out, we
3899 4531 // have definitely set _region_finger to something non-null.
3900 4532 assert(_region_finger != NULL, "invariant");
3901 4533
3902 4534 // Make sure that any previously aborted region has been
3903 4535 // cleared.
3904 4536 assert(_aborted_region.is_empty(), "aborted region not cleared");
3905 4537
3906 4538 // The iteration was actually aborted. So now _region_finger
3907 4539 // points to the address of the object we last scanned. If we
3908 4540 // leave it there, when we restart this task, we will rescan
3909 4541 // the object. It is easy to avoid this. We move the finger by
3910 4542 // enough to point to the next possible object header (the
3911 4543 // bitmap knows by how much we need to move it as it knows its
3912 4544 // granularity).
3913 4545 MemRegion newRegion =
3914 4546 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3915 4547
3916 4548 if (!newRegion.is_empty()) {
3917 4549 if (_cm->verbose_low()) {
3918 4550 gclog_or_tty->print_cr("[%d] recording unscanned region"
3919 4551 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3920 4552 _task_id,
3921 4553 newRegion.start(), newRegion.end());
3922 4554 }
3923 4555 // Now record the part of the region we didn't scan to
3924 4556 // make sure this task scans it later.
3925 4557 _aborted_region = newRegion;
3926 4558 }
3927 4559 // break from while
3928 4560 mr = MemRegion();
3929 4561 }
3930 4562 _region_finger = NULL;
3931 4563 }
3932 4564
3933 4565 if (_cm->verbose_low()) {
3934 4566 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3935 4567 _task_id, _cm->region_stack_size());
3936 4568 }
3937 4569 }
3938 4570 }
3939 4571
3940 4572 void CMTask::print_stats() {
3941 4573 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3942 4574 _task_id, _calls);
3943 4575 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3944 4576 _elapsed_time_ms, _termination_time_ms);
3945 4577 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3946 4578 _step_times_ms.num(), _step_times_ms.avg(),
3947 4579 _step_times_ms.sd());
3948 4580 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3949 4581 _step_times_ms.maximum(), _step_times_ms.sum());
3950 4582
3951 4583 #if _MARKING_STATS_
3952 4584 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3953 4585 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3954 4586 _all_clock_intervals_ms.sd());
3955 4587 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3956 4588 _all_clock_intervals_ms.maximum(),
3957 4589 _all_clock_intervals_ms.sum());
3958 4590 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3959 4591 _clock_due_to_scanning, _clock_due_to_marking);
3960 4592 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3961 4593 _objs_scanned, _objs_found_on_bitmap);
3962 4594 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3963 4595 _local_pushes, _local_pops, _local_max_size);
3964 4596 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3965 4597 _global_pushes, _global_pops, _global_max_size);
3966 4598 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3967 4599 _global_transfers_to,_global_transfers_from);
3968 4600 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3969 4601 _regions_claimed, _region_stack_pops);
3970 4602 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3971 4603 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3972 4604 _steal_attempts, _steals);
3973 4605 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3974 4606 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3975 4607 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3976 4608 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3977 4609 _aborted_timed_out, _aborted_satb, _aborted_termination);
3978 4610 #endif // _MARKING_STATS_
3979 4611 }
3980 4612
3981 4613 /*****************************************************************************
3982 4614
3983 4615 The do_marking_step(time_target_ms) method is the building block
3984 4616 of the parallel marking framework. It can be called in parallel
3985 4617 with other invocations of do_marking_step() on different tasks
3986 4618 (but only one per task, obviously) and concurrently with the
3987 4619 mutator threads, or during remark, hence it eliminates the need
3988 4620 for two versions of the code. When called during remark, it will
3989 4621 pick up from where the task left off during the concurrent marking
3990 4622 phase. Interestingly, tasks are also claimable during evacuation
3991 4623 pauses too, since do_marking_step() ensures that it aborts before
3992 4624 it needs to yield.
3993 4625
3994 4626 The data structures that is uses to do marking work are the
3995 4627 following:
3996 4628
3997 4629 (1) Marking Bitmap. If there are gray objects that appear only
3998 4630 on the bitmap (this happens either when dealing with an overflow
3999 4631 or when the initial marking phase has simply marked the roots
4000 4632 and didn't push them on the stack), then tasks claim heap
4001 4633 regions whose bitmap they then scan to find gray objects. A
4002 4634 global finger indicates where the end of the last claimed region
4003 4635 is. A local finger indicates how far into the region a task has
4004 4636 scanned. The two fingers are used to determine how to gray an
4005 4637 object (i.e. whether simply marking it is OK, as it will be
4006 4638 visited by a task in the future, or whether it needs to be also
4007 4639 pushed on a stack).
4008 4640
4009 4641 (2) Local Queue. The local queue of the task which is accessed
4010 4642 reasonably efficiently by the task. Other tasks can steal from
4011 4643 it when they run out of work. Throughout the marking phase, a
4012 4644 task attempts to keep its local queue short but not totally
4013 4645 empty, so that entries are available for stealing by other
4014 4646 tasks. Only when there is no more work, a task will totally
4015 4647 drain its local queue.
4016 4648
4017 4649 (3) Global Mark Stack. This handles local queue overflow. During
4018 4650 marking only sets of entries are moved between it and the local
4019 4651 queues, as access to it requires a mutex and more fine-grain
4020 4652 interaction with it which might cause contention. If it
4021 4653 overflows, then the marking phase should restart and iterate
4022 4654 over the bitmap to identify gray objects. Throughout the marking
4023 4655 phase, tasks attempt to keep the global mark stack at a small
4024 4656 length but not totally empty, so that entries are available for
4025 4657 popping by other tasks. Only when there is no more work, tasks
4026 4658 will totally drain the global mark stack.
4027 4659
4028 4660 (4) Global Region Stack. Entries on it correspond to areas of
4029 4661 the bitmap that need to be scanned since they contain gray
4030 4662 objects. Pushes on the region stack only happen during
4031 4663 evacuation pauses and typically correspond to areas covered by
4032 4664 GC LABS. If it overflows, then the marking phase should restart
4033 4665 and iterate over the bitmap to identify gray objects. Tasks will
4034 4666 try to totally drain the region stack as soon as possible.
4035 4667
4036 4668 (5) SATB Buffer Queue. This is where completed SATB buffers are
4037 4669 made available. Buffers are regularly removed from this queue
4038 4670 and scanned for roots, so that the queue doesn't get too
4039 4671 long. During remark, all completed buffers are processed, as
4040 4672 well as the filled in parts of any uncompleted buffers.
4041 4673
4042 4674 The do_marking_step() method tries to abort when the time target
4043 4675 has been reached. There are a few other cases when the
4044 4676 do_marking_step() method also aborts:
4045 4677
4046 4678 (1) When the marking phase has been aborted (after a Full GC).
4047 4679
4048 4680 (2) When a global overflow (either on the global stack or the
4049 4681 region stack) has been triggered. Before the task aborts, it
4050 4682 will actually sync up with the other tasks to ensure that all
4051 4683 the marking data structures (local queues, stacks, fingers etc.)
4052 4684 are re-initialised so that when do_marking_step() completes,
4053 4685 the marking phase can immediately restart.
4054 4686
4055 4687 (3) When enough completed SATB buffers are available. The
4056 4688 do_marking_step() method only tries to drain SATB buffers right
4057 4689 at the beginning. So, if enough buffers are available, the
4058 4690 marking step aborts and the SATB buffers are processed at
4059 4691 the beginning of the next invocation.
4060 4692
4061 4693 (4) To yield. when we have to yield then we abort and yield
4062 4694 right at the end of do_marking_step(). This saves us from a lot
4063 4695 of hassle as, by yielding we might allow a Full GC. If this
4064 4696 happens then objects will be compacted underneath our feet, the
4065 4697 heap might shrink, etc. We save checking for this by just
4066 4698 aborting and doing the yield right at the end.
4067 4699
4068 4700 From the above it follows that the do_marking_step() method should
4069 4701 be called in a loop (or, otherwise, regularly) until it completes.
4070 4702
4071 4703 If a marking step completes without its has_aborted() flag being
4072 4704 true, it means it has completed the current marking phase (and
4073 4705 also all other marking tasks have done so and have all synced up).
4074 4706
4075 4707 A method called regular_clock_call() is invoked "regularly" (in
4076 4708 sub ms intervals) throughout marking. It is this clock method that
4077 4709 checks all the abort conditions which were mentioned above and
4078 4710 decides when the task should abort. A work-based scheme is used to
4079 4711 trigger this clock method: when the number of object words the
4080 4712 marking phase has scanned or the number of references the marking
4081 4713 phase has visited reach a given limit. Additional invocations to
4082 4714 the method clock have been planted in a few other strategic places
4083 4715 too. The initial reason for the clock method was to avoid calling
4084 4716 vtime too regularly, as it is quite expensive. So, once it was in
4085 4717 place, it was natural to piggy-back all the other conditions on it
4086 4718 too and not constantly check them throughout the code.
4087 4719
4088 4720 *****************************************************************************/
4089 4721
4090 4722 void CMTask::do_marking_step(double time_target_ms,
4091 4723 bool do_stealing,
4092 4724 bool do_termination) {
4093 4725 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4094 4726 assert(concurrent() == _cm->concurrent(), "they should be the same");
4095 4727
4096 4728 assert(concurrent() || _cm->region_stack_empty(),
4097 4729 "the region stack should have been cleared before remark");
4098 4730 assert(concurrent() || !_cm->has_aborted_regions(),
4099 4731 "aborted regions should have been cleared before remark");
4100 4732 assert(_region_finger == NULL,
4101 4733 "this should be non-null only when a region is being scanned");
4102 4734
4103 4735 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4104 4736 assert(_task_queues != NULL, "invariant");
4105 4737 assert(_task_queue != NULL, "invariant");
4106 4738 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4107 4739
4108 4740 assert(!_claimed,
4109 4741 "only one thread should claim this task at any one time");
4110 4742
4111 4743 // OK, this doesn't safeguard again all possible scenarios, as it is
4112 4744 // possible for two threads to set the _claimed flag at the same
4113 4745 // time. But it is only for debugging purposes anyway and it will
4114 4746 // catch most problems.
4115 4747 _claimed = true;
4116 4748
4117 4749 _start_time_ms = os::elapsedVTime() * 1000.0;
4118 4750 statsOnly( _interval_start_time_ms = _start_time_ms );
4119 4751
4120 4752 double diff_prediction_ms =
4121 4753 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4122 4754 _time_target_ms = time_target_ms - diff_prediction_ms;
4123 4755
4124 4756 // set up the variables that are used in the work-based scheme to
4125 4757 // call the regular clock method
4126 4758 _words_scanned = 0;
4127 4759 _refs_reached = 0;
4128 4760 recalculate_limits();
4129 4761
4130 4762 // clear all flags
4131 4763 clear_has_aborted();
4132 4764 _has_timed_out = false;
4133 4765 _draining_satb_buffers = false;
4134 4766
4135 4767 ++_calls;
4136 4768
4137 4769 if (_cm->verbose_low()) {
4138 4770 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4139 4771 "target = %1.2lfms >>>>>>>>>>",
4140 4772 _task_id, _calls, _time_target_ms);
4141 4773 }
4142 4774
4143 4775 // Set up the bitmap and oop closures. Anything that uses them is
4144 4776 // eventually called from this method, so it is OK to allocate these
4145 4777 // statically.
4146 4778 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4147 4779 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4148 4780 set_cm_oop_closure(&cm_oop_closure);
4149 4781
4150 4782 if (_cm->has_overflown()) {
4151 4783 // This can happen if the region stack or the mark stack overflows
4152 4784 // during a GC pause and this task, after a yield point,
4153 4785 // restarts. We have to abort as we need to get into the overflow
4154 4786 // protocol which happens right at the end of this task.
4155 4787 set_has_aborted();
4156 4788 }
4157 4789
4158 4790 // First drain any available SATB buffers. After this, we will not
4159 4791 // look at SATB buffers before the next invocation of this method.
4160 4792 // If enough completed SATB buffers are queued up, the regular clock
4161 4793 // will abort this task so that it restarts.
4162 4794 drain_satb_buffers();
4163 4795 // ...then partially drain the local queue and the global stack
4164 4796 drain_local_queue(true);
4165 4797 drain_global_stack(true);
4166 4798
4167 4799 // Then totally drain the region stack. We will not look at
4168 4800 // it again before the next invocation of this method. Entries on
4169 4801 // the region stack are only added during evacuation pauses, for
4170 4802 // which we have to yield. When we do, we abort the task anyway so
4171 4803 // it will look at the region stack again when it restarts.
4172 4804 bitmap_closure.set_scanning_heap_region(false);
4173 4805 drain_region_stack(&bitmap_closure);
4174 4806 // ...then partially drain the local queue and the global stack
4175 4807 drain_local_queue(true);
4176 4808 drain_global_stack(true);
4177 4809
4178 4810 do {
4179 4811 if (!has_aborted() && _curr_region != NULL) {
4180 4812 // This means that we're already holding on to a region.
4181 4813 assert(_finger != NULL, "if region is not NULL, then the finger "
4182 4814 "should not be NULL either");
4183 4815
4184 4816 // We might have restarted this task after an evacuation pause
4185 4817 // which might have evacuated the region we're holding on to
4186 4818 // underneath our feet. Let's read its limit again to make sure
4187 4819 // that we do not iterate over a region of the heap that
4188 4820 // contains garbage (update_region_limit() will also move
4189 4821 // _finger to the start of the region if it is found empty).
4190 4822 update_region_limit();
4191 4823 // We will start from _finger not from the start of the region,
4192 4824 // as we might be restarting this task after aborting half-way
4193 4825 // through scanning this region. In this case, _finger points to
4194 4826 // the address where we last found a marked object. If this is a
4195 4827 // fresh region, _finger points to start().
4196 4828 MemRegion mr = MemRegion(_finger, _region_limit);
4197 4829
4198 4830 if (_cm->verbose_low()) {
4199 4831 gclog_or_tty->print_cr("[%d] we're scanning part "
4200 4832 "["PTR_FORMAT", "PTR_FORMAT") "
4201 4833 "of region "PTR_FORMAT,
4202 4834 _task_id, _finger, _region_limit, _curr_region);
4203 4835 }
4204 4836
4205 4837 // Let's iterate over the bitmap of the part of the
4206 4838 // region that is left.
4207 4839 bitmap_closure.set_scanning_heap_region(true);
4208 4840 if (mr.is_empty() ||
4209 4841 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4210 4842 // We successfully completed iterating over the region. Now,
4211 4843 // let's give up the region.
4212 4844 giveup_current_region();
4213 4845 regular_clock_call();
4214 4846 } else {
4215 4847 assert(has_aborted(), "currently the only way to do so");
4216 4848 // The only way to abort the bitmap iteration is to return
4217 4849 // false from the do_bit() method. However, inside the
4218 4850 // do_bit() method we move the _finger to point to the
4219 4851 // object currently being looked at. So, if we bail out, we
4220 4852 // have definitely set _finger to something non-null.
4221 4853 assert(_finger != NULL, "invariant");
4222 4854
4223 4855 // Region iteration was actually aborted. So now _finger
4224 4856 // points to the address of the object we last scanned. If we
4225 4857 // leave it there, when we restart this task, we will rescan
4226 4858 // the object. It is easy to avoid this. We move the finger by
4227 4859 // enough to point to the next possible object header (the
4228 4860 // bitmap knows by how much we need to move it as it knows its
4229 4861 // granularity).
4230 4862 assert(_finger < _region_limit, "invariant");
4231 4863 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4232 4864 // Check if bitmap iteration was aborted while scanning the last object
4233 4865 if (new_finger >= _region_limit) {
4234 4866 giveup_current_region();
4235 4867 } else {
4236 4868 move_finger_to(new_finger);
4237 4869 }
4238 4870 }
4239 4871 }
4240 4872 // At this point we have either completed iterating over the
4241 4873 // region we were holding on to, or we have aborted.
4242 4874
4243 4875 // We then partially drain the local queue and the global stack.
4244 4876 // (Do we really need this?)
4245 4877 drain_local_queue(true);
4246 4878 drain_global_stack(true);
4247 4879
4248 4880 // Read the note on the claim_region() method on why it might
4249 4881 // return NULL with potentially more regions available for
4250 4882 // claiming and why we have to check out_of_regions() to determine
4251 4883 // whether we're done or not.
4252 4884 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4253 4885 // We are going to try to claim a new region. We should have
4254 4886 // given up on the previous one.
4255 4887 // Separated the asserts so that we know which one fires.
4256 4888 assert(_curr_region == NULL, "invariant");
4257 4889 assert(_finger == NULL, "invariant");
4258 4890 assert(_region_limit == NULL, "invariant");
4259 4891 if (_cm->verbose_low()) {
4260 4892 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4261 4893 }
4262 4894 HeapRegion* claimed_region = _cm->claim_region(_task_id);
4263 4895 if (claimed_region != NULL) {
4264 4896 // Yes, we managed to claim one
4265 4897 statsOnly( ++_regions_claimed );
4266 4898
4267 4899 if (_cm->verbose_low()) {
4268 4900 gclog_or_tty->print_cr("[%d] we successfully claimed "
4269 4901 "region "PTR_FORMAT,
4270 4902 _task_id, claimed_region);
4271 4903 }
4272 4904
4273 4905 setup_for_region(claimed_region);
4274 4906 assert(_curr_region == claimed_region, "invariant");
4275 4907 }
4276 4908 // It is important to call the regular clock here. It might take
4277 4909 // a while to claim a region if, for example, we hit a large
4278 4910 // block of empty regions. So we need to call the regular clock
4279 4911 // method once round the loop to make sure it's called
4280 4912 // frequently enough.
4281 4913 regular_clock_call();
4282 4914 }
4283 4915
4284 4916 if (!has_aborted() && _curr_region == NULL) {
4285 4917 assert(_cm->out_of_regions(),
4286 4918 "at this point we should be out of regions");
4287 4919 }
4288 4920 } while ( _curr_region != NULL && !has_aborted());
4289 4921
4290 4922 if (!has_aborted()) {
4291 4923 // We cannot check whether the global stack is empty, since other
4292 4924 // tasks might be pushing objects to it concurrently. We also cannot
4293 4925 // check if the region stack is empty because if a thread is aborting
4294 4926 // it can push a partially done region back.
4295 4927 assert(_cm->out_of_regions(),
4296 4928 "at this point we should be out of regions");
4297 4929
4298 4930 if (_cm->verbose_low()) {
4299 4931 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4300 4932 }
4301 4933
4302 4934 // Try to reduce the number of available SATB buffers so that
4303 4935 // remark has less work to do.
4304 4936 drain_satb_buffers();
4305 4937 }
4306 4938
4307 4939 // Since we've done everything else, we can now totally drain the
4308 4940 // local queue and global stack.
4309 4941 drain_local_queue(false);
4310 4942 drain_global_stack(false);
4311 4943
4312 4944 // Attempt at work stealing from other task's queues.
4313 4945 if (do_stealing && !has_aborted()) {
4314 4946 // We have not aborted. This means that we have finished all that
4315 4947 // we could. Let's try to do some stealing...
4316 4948
4317 4949 // We cannot check whether the global stack is empty, since other
4318 4950 // tasks might be pushing objects to it concurrently. We also cannot
4319 4951 // check if the region stack is empty because if a thread is aborting
4320 4952 // it can push a partially done region back.
4321 4953 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4322 4954 "only way to reach here");
4323 4955
4324 4956 if (_cm->verbose_low()) {
4325 4957 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4326 4958 }
4327 4959
4328 4960 while (!has_aborted()) {
4329 4961 oop obj;
4330 4962 statsOnly( ++_steal_attempts );
4331 4963
4332 4964 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4333 4965 if (_cm->verbose_medium()) {
4334 4966 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4335 4967 _task_id, (void*) obj);
4336 4968 }
4337 4969
4338 4970 statsOnly( ++_steals );
4339 4971
4340 4972 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4341 4973 "any stolen object should be marked");
4342 4974 scan_object(obj);
4343 4975
4344 4976 // And since we're towards the end, let's totally drain the
4345 4977 // local queue and global stack.
4346 4978 drain_local_queue(false);
4347 4979 drain_global_stack(false);
4348 4980 } else {
4349 4981 break;
4350 4982 }
4351 4983 }
4352 4984 }
4353 4985
4354 4986 // If we are about to wrap up and go into termination, check if we
4355 4987 // should raise the overflow flag.
4356 4988 if (do_termination && !has_aborted()) {
4357 4989 if (_cm->force_overflow()->should_force()) {
4358 4990 _cm->set_has_overflown();
4359 4991 regular_clock_call();
4360 4992 }
4361 4993 }
4362 4994
4363 4995 // We still haven't aborted. Now, let's try to get into the
4364 4996 // termination protocol.
4365 4997 if (do_termination && !has_aborted()) {
4366 4998 // We cannot check whether the global stack is empty, since other
4367 4999 // tasks might be concurrently pushing objects on it. We also cannot
4368 5000 // check if the region stack is empty because if a thread is aborting
4369 5001 // it can push a partially done region back.
4370 5002 // Separated the asserts so that we know which one fires.
4371 5003 assert(_cm->out_of_regions(), "only way to reach here");
4372 5004 assert(_task_queue->size() == 0, "only way to reach here");
4373 5005
4374 5006 if (_cm->verbose_low()) {
4375 5007 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4376 5008 }
4377 5009
4378 5010 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4379 5011 // The CMTask class also extends the TerminatorTerminator class,
4380 5012 // hence its should_exit_termination() method will also decide
4381 5013 // whether to exit the termination protocol or not.
4382 5014 bool finished = _cm->terminator()->offer_termination(this);
4383 5015 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4384 5016 _termination_time_ms +=
4385 5017 termination_end_time_ms - _termination_start_time_ms;
4386 5018
4387 5019 if (finished) {
4388 5020 // We're all done.
4389 5021
4390 5022 if (_task_id == 0) {
4391 5023 // let's allow task 0 to do this
4392 5024 if (concurrent()) {
4393 5025 assert(_cm->concurrent_marking_in_progress(), "invariant");
4394 5026 // we need to set this to false before the next
4395 5027 // safepoint. This way we ensure that the marking phase
4396 5028 // doesn't observe any more heap expansions.
4397 5029 _cm->clear_concurrent_marking_in_progress();
4398 5030 }
4399 5031 }
4400 5032
4401 5033 // We can now guarantee that the global stack is empty, since
4402 5034 // all other tasks have finished. We separated the guarantees so
4403 5035 // that, if a condition is false, we can immediately find out
4404 5036 // which one.
4405 5037 guarantee(_cm->out_of_regions(), "only way to reach here");
4406 5038 guarantee(_aborted_region.is_empty(), "only way to reach here");
4407 5039 guarantee(_cm->region_stack_empty(), "only way to reach here");
4408 5040 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4409 5041 guarantee(_task_queue->size() == 0, "only way to reach here");
4410 5042 guarantee(!_cm->has_overflown(), "only way to reach here");
4411 5043 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4412 5044 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4413 5045
4414 5046 if (_cm->verbose_low()) {
4415 5047 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4416 5048 }
4417 5049 } else {
4418 5050 // Apparently there's more work to do. Let's abort this task. It
4419 5051 // will restart it and we can hopefully find more things to do.
4420 5052
4421 5053 if (_cm->verbose_low()) {
4422 5054 gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4423 5055 _task_id);
4424 5056 }
4425 5057
4426 5058 set_has_aborted();
4427 5059 statsOnly( ++_aborted_termination );
4428 5060 }
4429 5061 }
4430 5062
4431 5063 // Mainly for debugging purposes to make sure that a pointer to the
4432 5064 // closure which was statically allocated in this frame doesn't
4433 5065 // escape it by accident.
4434 5066 set_cm_oop_closure(NULL);
4435 5067 double end_time_ms = os::elapsedVTime() * 1000.0;
4436 5068 double elapsed_time_ms = end_time_ms - _start_time_ms;
4437 5069 // Update the step history.
4438 5070 _step_times_ms.add(elapsed_time_ms);
4439 5071
4440 5072 if (has_aborted()) {
4441 5073 // The task was aborted for some reason.
4442 5074
4443 5075 statsOnly( ++_aborted );
4444 5076
4445 5077 if (_has_timed_out) {
4446 5078 double diff_ms = elapsed_time_ms - _time_target_ms;
4447 5079 // Keep statistics of how well we did with respect to hitting
4448 5080 // our target only if we actually timed out (if we aborted for
4449 5081 // other reasons, then the results might get skewed).
4450 5082 _marking_step_diffs_ms.add(diff_ms);
4451 5083 }
4452 5084
4453 5085 if (_cm->has_overflown()) {
4454 5086 // This is the interesting one. We aborted because a global
4455 5087 // overflow was raised. This means we have to restart the
4456 5088 // marking phase and start iterating over regions. However, in
4457 5089 // order to do this we have to make sure that all tasks stop
4458 5090 // what they are doing and re-initialise in a safe manner. We
4459 5091 // will achieve this with the use of two barrier sync points.
4460 5092
4461 5093 if (_cm->verbose_low()) {
4462 5094 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4463 5095 }
4464 5096
4465 5097 _cm->enter_first_sync_barrier(_task_id);
4466 5098 // When we exit this sync barrier we know that all tasks have
4467 5099 // stopped doing marking work. So, it's now safe to
4468 5100 // re-initialise our data structures. At the end of this method,
4469 5101 // task 0 will clear the global data structures.
4470 5102
4471 5103 statsOnly( ++_aborted_overflow );
4472 5104
4473 5105 // We clear the local state of this task...
4474 5106 clear_region_fields();
4475 5107
4476 5108 // ...and enter the second barrier.
4477 5109 _cm->enter_second_sync_barrier(_task_id);
4478 5110 // At this point everything has bee re-initialised and we're
4479 5111 // ready to restart.
4480 5112 }
4481 5113
4482 5114 if (_cm->verbose_low()) {
4483 5115 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4484 5116 "elapsed = %1.2lfms <<<<<<<<<<",
4485 5117 _task_id, _time_target_ms, elapsed_time_ms);
4486 5118 if (_cm->has_aborted()) {
4487 5119 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4488 5120 _task_id);
4489 5121 }
4490 5122 }
4491 5123 } else {
4492 5124 if (_cm->verbose_low()) {
4493 5125 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4494 5126 "elapsed = %1.2lfms <<<<<<<<<<",
4495 5127 _task_id, _time_target_ms, elapsed_time_ms);
4496 5128 }
4497 5129 }
4498 5130
4499 5131 _claimed = false;
4500 5132 }
4501 5133
4502 5134 CMTask::CMTask(int task_id,
4503 5135 ConcurrentMark* cm,
4504 5136 CMTaskQueue* task_queue,
4505 5137 CMTaskQueueSet* task_queues)
4506 5138 : _g1h(G1CollectedHeap::heap()),
4507 5139 _task_id(task_id), _cm(cm),
4508 5140 _claimed(false),
4509 5141 _nextMarkBitMap(NULL), _hash_seed(17),
4510 5142 _task_queue(task_queue),
4511 5143 _task_queues(task_queues),
4512 5144 _cm_oop_closure(NULL),
4513 5145 _aborted_region(MemRegion()) {
4514 5146 guarantee(task_queue != NULL, "invariant");
4515 5147 guarantee(task_queues != NULL, "invariant");
4516 5148
4517 5149 statsOnly( _clock_due_to_scanning = 0;
4518 5150 _clock_due_to_marking = 0 );
4519 5151
4520 5152 _marking_step_diffs_ms.add(0.5);
4521 5153 }
4522 5154
4523 5155 // These are formatting macros that are used below to ensure
4524 5156 // consistent formatting. The *_H_* versions are used to format the
4525 5157 // header for a particular value and they should be kept consistent
4526 5158 // with the corresponding macro. Also note that most of the macros add
4527 5159 // the necessary white space (as a prefix) which makes them a bit
4528 5160 // easier to compose.
4529 5161
4530 5162 // All the output lines are prefixed with this string to be able to
4531 5163 // identify them easily in a large log file.
4532 5164 #define G1PPRL_LINE_PREFIX "###"
4533 5165
4534 5166 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4535 5167 #ifdef _LP64
4536 5168 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4537 5169 #else // _LP64
4538 5170 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4539 5171 #endif // _LP64
4540 5172
4541 5173 // For per-region info
4542 5174 #define G1PPRL_TYPE_FORMAT " %-4s"
4543 5175 #define G1PPRL_TYPE_H_FORMAT " %4s"
4544 5176 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4545 5177 #define G1PPRL_BYTE_H_FORMAT " %9s"
4546 5178 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4547 5179 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4548 5180
4549 5181 // For summary info
4550 5182 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4551 5183 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4552 5184 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4553 5185 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4554 5186
4555 5187 G1PrintRegionLivenessInfoClosure::
4556 5188 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4557 5189 : _out(out),
4558 5190 _total_used_bytes(0), _total_capacity_bytes(0),
4559 5191 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4560 5192 _hum_used_bytes(0), _hum_capacity_bytes(0),
4561 5193 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4562 5194 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4563 5195 MemRegion g1_committed = g1h->g1_committed();
4564 5196 MemRegion g1_reserved = g1h->g1_reserved();
4565 5197 double now = os::elapsedTime();
4566 5198
4567 5199 // Print the header of the output.
4568 5200 _out->cr();
4569 5201 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4570 5202 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4571 5203 G1PPRL_SUM_ADDR_FORMAT("committed")
4572 5204 G1PPRL_SUM_ADDR_FORMAT("reserved")
4573 5205 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4574 5206 g1_committed.start(), g1_committed.end(),
4575 5207 g1_reserved.start(), g1_reserved.end(),
4576 5208 (size_t)HeapRegion::GrainBytes);
4577 5209 _out->print_cr(G1PPRL_LINE_PREFIX);
4578 5210 _out->print_cr(G1PPRL_LINE_PREFIX
4579 5211 G1PPRL_TYPE_H_FORMAT
4580 5212 G1PPRL_ADDR_BASE_H_FORMAT
4581 5213 G1PPRL_BYTE_H_FORMAT
4582 5214 G1PPRL_BYTE_H_FORMAT
4583 5215 G1PPRL_BYTE_H_FORMAT
4584 5216 G1PPRL_DOUBLE_H_FORMAT,
4585 5217 "type", "address-range",
4586 5218 "used", "prev-live", "next-live", "gc-eff");
4587 5219 _out->print_cr(G1PPRL_LINE_PREFIX
4588 5220 G1PPRL_TYPE_H_FORMAT
4589 5221 G1PPRL_ADDR_BASE_H_FORMAT
4590 5222 G1PPRL_BYTE_H_FORMAT
4591 5223 G1PPRL_BYTE_H_FORMAT
4592 5224 G1PPRL_BYTE_H_FORMAT
4593 5225 G1PPRL_DOUBLE_H_FORMAT,
4594 5226 "", "",
4595 5227 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4596 5228 }
4597 5229
4598 5230 // It takes as a parameter a reference to one of the _hum_* fields, it
4599 5231 // deduces the corresponding value for a region in a humongous region
4600 5232 // series (either the region size, or what's left if the _hum_* field
4601 5233 // is < the region size), and updates the _hum_* field accordingly.
4602 5234 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4603 5235 size_t bytes = 0;
4604 5236 // The > 0 check is to deal with the prev and next live bytes which
4605 5237 // could be 0.
4606 5238 if (*hum_bytes > 0) {
4607 5239 bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
4608 5240 *hum_bytes -= bytes;
4609 5241 }
4610 5242 return bytes;
4611 5243 }
4612 5244
4613 5245 // It deduces the values for a region in a humongous region series
4614 5246 // from the _hum_* fields and updates those accordingly. It assumes
4615 5247 // that that _hum_* fields have already been set up from the "starts
4616 5248 // humongous" region and we visit the regions in address order.
4617 5249 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4618 5250 size_t* capacity_bytes,
4619 5251 size_t* prev_live_bytes,
4620 5252 size_t* next_live_bytes) {
4621 5253 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4622 5254 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4623 5255 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4624 5256 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4625 5257 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4626 5258 }
4627 5259
4628 5260 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4629 5261 const char* type = "";
4630 5262 HeapWord* bottom = r->bottom();
4631 5263 HeapWord* end = r->end();
4632 5264 size_t capacity_bytes = r->capacity();
4633 5265 size_t used_bytes = r->used();
4634 5266 size_t prev_live_bytes = r->live_bytes();
4635 5267 size_t next_live_bytes = r->next_live_bytes();
4636 5268 double gc_eff = r->gc_efficiency();
4637 5269 if (r->used() == 0) {
4638 5270 type = "FREE";
4639 5271 } else if (r->is_survivor()) {
4640 5272 type = "SURV";
4641 5273 } else if (r->is_young()) {
4642 5274 type = "EDEN";
4643 5275 } else if (r->startsHumongous()) {
4644 5276 type = "HUMS";
4645 5277
4646 5278 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4647 5279 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4648 5280 "they should have been zeroed after the last time we used them");
4649 5281 // Set up the _hum_* fields.
4650 5282 _hum_capacity_bytes = capacity_bytes;
4651 5283 _hum_used_bytes = used_bytes;
4652 5284 _hum_prev_live_bytes = prev_live_bytes;
4653 5285 _hum_next_live_bytes = next_live_bytes;
4654 5286 get_hum_bytes(&used_bytes, &capacity_bytes,
4655 5287 &prev_live_bytes, &next_live_bytes);
4656 5288 end = bottom + HeapRegion::GrainWords;
4657 5289 } else if (r->continuesHumongous()) {
4658 5290 type = "HUMC";
4659 5291 get_hum_bytes(&used_bytes, &capacity_bytes,
4660 5292 &prev_live_bytes, &next_live_bytes);
4661 5293 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4662 5294 } else {
4663 5295 type = "OLD";
4664 5296 }
4665 5297
4666 5298 _total_used_bytes += used_bytes;
4667 5299 _total_capacity_bytes += capacity_bytes;
4668 5300 _total_prev_live_bytes += prev_live_bytes;
4669 5301 _total_next_live_bytes += next_live_bytes;
4670 5302
4671 5303 // Print a line for this particular region.
4672 5304 _out->print_cr(G1PPRL_LINE_PREFIX
4673 5305 G1PPRL_TYPE_FORMAT
4674 5306 G1PPRL_ADDR_BASE_FORMAT
4675 5307 G1PPRL_BYTE_FORMAT
4676 5308 G1PPRL_BYTE_FORMAT
4677 5309 G1PPRL_BYTE_FORMAT
4678 5310 G1PPRL_DOUBLE_FORMAT,
4679 5311 type, bottom, end,
4680 5312 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4681 5313
4682 5314 return false;
4683 5315 }
4684 5316
4685 5317 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4686 5318 // Print the footer of the output.
4687 5319 _out->print_cr(G1PPRL_LINE_PREFIX);
4688 5320 _out->print_cr(G1PPRL_LINE_PREFIX
4689 5321 " SUMMARY"
4690 5322 G1PPRL_SUM_MB_FORMAT("capacity")
4691 5323 G1PPRL_SUM_MB_PERC_FORMAT("used")
4692 5324 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4693 5325 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4694 5326 bytes_to_mb(_total_capacity_bytes),
4695 5327 bytes_to_mb(_total_used_bytes),
4696 5328 perc(_total_used_bytes, _total_capacity_bytes),
4697 5329 bytes_to_mb(_total_prev_live_bytes),
4698 5330 perc(_total_prev_live_bytes, _total_capacity_bytes),
4699 5331 bytes_to_mb(_total_next_live_bytes),
4700 5332 perc(_total_next_live_bytes, _total_capacity_bytes));
4701 5333 _out->cr();
4702 5334 }
↓ open down ↓ |
1483 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX