Print this page
rev 3708 : 8000244: G1: Ergonomically set MarkStackSize and use virtual space for global marking stack
Summary: Set the value of MarkStackSize to a value based on the number of parallel marking threads with a reasonable minimum. Expand the marking stack if we have to restart marking due to an overflow up to a reasonable maximum. Allocate the underlying space for the marking stack from virtual memory.
Reviewed-by: jmasa
rev 3709 : imported patch reuse-old-marking-stack
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 32 #include "gc_implementation/g1/g1Log.hpp"
33 33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
34 34 #include "gc_implementation/g1/g1RemSet.hpp"
35 35 #include "gc_implementation/g1/heapRegion.inline.hpp"
36 36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
37 37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
38 38 #include "gc_implementation/shared/vmGCOperations.hpp"
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
39 39 #include "memory/genOopClosures.inline.hpp"
40 40 #include "memory/referencePolicy.hpp"
41 41 #include "memory/resourceArea.hpp"
42 42 #include "oops/oop.inline.hpp"
43 43 #include "runtime/handles.inline.hpp"
44 44 #include "runtime/java.hpp"
45 45 #include "services/memTracker.hpp"
46 46
47 47 // Concurrent marking bit map wrapper
48 48
49 -CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
50 - _bm((uintptr_t*)NULL,0),
49 +CMBitMapRO::CMBitMapRO(int shifter) :
50 + _bm(),
51 51 _shifter(shifter) {
52 - _bmStartWord = (HeapWord*)(rs.base());
53 - _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
54 - ReservedSpace brs(ReservedSpace::allocation_align_size_up(
55 - (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
56 -
57 - MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
58 -
59 - guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
60 - // For now we'll just commit all of the bit map up fromt.
61 - // Later on we'll try to be more parsimonious with swap.
62 - guarantee(_virtual_space.initialize(brs, brs.size()),
63 - "couldn't reseve backing store for concurrent marking bit map");
64 - assert(_virtual_space.committed_size() == brs.size(),
65 - "didn't reserve backing store for all of concurrent marking bit map?");
66 - _bm.set_map((uintptr_t*)_virtual_space.low());
67 - assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
68 - _bmWordSize, "inconsistency in bit map sizing");
69 - _bm.set_size(_bmWordSize >> _shifter);
52 + _bmStartWord = 0;
53 + _bmWordSize = 0;
70 54 }
71 55
72 56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
73 57 HeapWord* limit) const {
74 58 // First we must round addr *up* to a possible object boundary.
75 59 addr = (HeapWord*)align_size_up((intptr_t)addr,
76 60 HeapWordSize << _shifter);
77 61 size_t addrOffset = heapWordToOffset(addr);
78 62 if (limit == NULL) {
79 63 limit = _bmStartWord + _bmWordSize;
80 64 }
81 65 size_t limitOffset = heapWordToOffset(limit);
82 66 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
83 67 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
84 68 assert(nextAddr >= addr, "get_next_one postcondition");
85 69 assert(nextAddr == limit || isMarked(nextAddr),
86 70 "get_next_one postcondition");
87 71 return nextAddr;
88 72 }
89 73
90 74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
91 75 HeapWord* limit) const {
92 76 size_t addrOffset = heapWordToOffset(addr);
93 77 if (limit == NULL) {
94 78 limit = _bmStartWord + _bmWordSize;
95 79 }
96 80 size_t limitOffset = heapWordToOffset(limit);
97 81 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
98 82 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
99 83 assert(nextAddr >= addr, "get_next_one postcondition");
100 84 assert(nextAddr == limit || !isMarked(nextAddr),
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
101 85 "get_next_one postcondition");
102 86 return nextAddr;
103 87 }
104 88
105 89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
106 90 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
107 91 return (int) (diff >> _shifter);
108 92 }
109 93
110 94 #ifndef PRODUCT
111 -bool CMBitMapRO::covers(ReservedSpace rs) const {
95 +bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
112 96 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
113 97 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
114 98 "size inconsistency");
115 - return _bmStartWord == (HeapWord*)(rs.base()) &&
116 - _bmWordSize == rs.size()>>LogHeapWordSize;
99 + return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
100 + _bmWordSize == heap_rs.size()>>LogHeapWordSize;
117 101 }
118 102 #endif
119 103
104 +bool CMBitMap::allocate(ReservedSpace heap_rs) {
105 + _bmStartWord = (HeapWord*)(heap_rs.base());
106 + _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes
107 + ReservedSpace brs(ReservedSpace::allocation_align_size_up(
108 + (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
109 + if (!brs.is_reserved()) {
110 + warning("ConcurrentMark marking bit map allocation failure");
111 + return false;
112 + }
113 + MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
114 + // For now we'll just commit all of the bit map up front.
115 + // Later on we'll try to be more parsimonious with swap.
116 + if (!_virtual_space.initialize(brs, brs.size())) {
117 + warning("ConcurrentMark marking bit map backing store failure");
118 + return false;
119 + }
120 + assert(_virtual_space.committed_size() == brs.size(),
121 + "didn't reserve backing store for all of concurrent marking bit map?");
122 + _bm.set_map((uintptr_t*)_virtual_space.low());
123 + assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
124 + _bmWordSize, "inconsistency in bit map sizing");
125 + _bm.set_size(_bmWordSize >> _shifter);
126 + return true;
127 +}
128 +
120 129 void CMBitMap::clearAll() {
121 130 _bm.clear();
122 131 return;
123 132 }
124 133
125 134 void CMBitMap::markRange(MemRegion mr) {
126 135 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
127 136 assert(!mr.is_empty(), "unexpected empty region");
128 137 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
129 138 ((HeapWord *) mr.end())),
130 139 "markRange memory region end is not card aligned");
131 140 // convert address range into offset range
132 141 _bm.at_put_range(heapWordToOffset(mr.start()),
133 142 heapWordToOffset(mr.end()), true);
134 143 }
135 144
136 145 void CMBitMap::clearRange(MemRegion mr) {
137 146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
138 147 assert(!mr.is_empty(), "unexpected empty region");
139 148 // convert address range into offset range
140 149 _bm.at_put_range(heapWordToOffset(mr.start()),
141 150 heapWordToOffset(mr.end()), false);
142 151 }
143 152
144 153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
145 154 HeapWord* end_addr) {
146 155 HeapWord* start = getNextMarkedWordAddress(addr);
147 156 start = MIN2(start, end_addr);
148 157 HeapWord* end = getNextUnmarkedWordAddress(start);
149 158 end = MIN2(end, end_addr);
150 159 assert(start <= end, "Consistency check");
151 160 MemRegion mr(start, end);
152 161 if (!mr.is_empty()) {
153 162 clearRange(mr);
154 163 }
155 164 return mr;
↓ open down ↓ |
26 lines elided |
↑ open up ↑ |
156 165 }
157 166
158 167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
159 168 _base(NULL), _cm(cm)
160 169 #ifdef ASSERT
161 170 , _drain_in_progress(false)
162 171 , _drain_in_progress_yields(false)
163 172 #endif
164 173 {}
165 174
166 -void CMMarkStack::allocate(size_t size) {
167 - _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
168 - if (_base == NULL) {
169 - vm_exit_during_initialization("Failed to allocate CM region mark stack");
175 +bool CMMarkStack::allocate(size_t capacity) {
176 + // allocate a stack of the requisite depth
177 + ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
178 + if (!rs.is_reserved()) {
179 + warning("ConcurrentMark MarkStack allocation failure");
180 + return false;
170 181 }
171 - _index = 0;
172 - _capacity = (jint) size;
182 + MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
183 + if (!_virtual_space.initialize(rs, rs.size())) {
184 + warning("ConcurrentMark MarkStack backing store failure");
185 + // Release the virtual memory reserved for the marking stack
186 + rs.release();
187 + return false;
188 + }
189 + assert(_virtual_space.committed_size() == rs.size(),
190 + "Didn't reserve backing store for all of ConcurrentMark stack?");
191 + _rs = rs;
192 + _base = (oop*) _virtual_space.low();
193 + setEmpty();
194 + _capacity = (jint) capacity;
173 195 _saved_index = -1;
174 196 NOT_PRODUCT(_max_depth = 0);
197 + return true;
198 +}
199 +
200 +void CMMarkStack::expand() {
201 + // Called, during remark, if we've overflown the marking stack during marking.
202 + assert(isEmpty(), "stack should been emptied while handling overflow");
203 + assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
204 + // Clear expansion flag
205 + _should_expand = false;
206 + if (_capacity == (jint) MarkStackSizeMax) {
207 + if (PrintGCDetails && Verbose) {
208 + gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
209 + }
210 + return;
211 + }
212 + // Double capacity if possible
213 + jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
214 + // Do not give up existing stack until we have managed to
215 + // get the double capacity that we desired.
216 + ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
217 + sizeof(oop)));
218 + if (!rs.is_reserved()) {
219 + if (PrintGCDetails && Verbose) {
220 + // Failed to double capacity, continue;
221 + gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
222 + SIZE_FORMAT "K to " SIZE_FORMAT "K",
223 + _capacity / K, new_capacity / K);
224 + }
225 + return;
226 + }
227 +
228 + // Clear the backing store fields associated with the space for the
229 + // old marking stack. Note this doesn't actuall release the space.
230 + _virtual_space.release();
231 +
232 + // Reinitialize virtual space for the expanded stack.
233 + if (!_virtual_space.initialize(rs, rs.size())) {
234 + // We failed to commit the the space for the expanded marking stack
235 + // Release the expanded reserved space...
236 + rs.release();
237 + // ... and reinitialize with the previous un-expanded space.
238 + if (_virtual_space.initialize(_rs, _rs.size())) {
239 + if (PrintGCDetails && Verbose) {
240 + gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
241 + SIZE_FORMAT "K to " SIZE_FORMAT "K",
242 + _capacity / K, new_capacity / K);
243 + }
244 + } else {
245 + // The previous backing store space should have been already
246 + // committed but we failed to initialize the virtual space
247 + // for some reason.
248 + fatal("Error re-initializing marking stack with old capacity");
249 + }
250 + } else {
251 + // We successfully committed the space for the expanded marking stack.
252 + if (PrintGCDetails && Verbose) {
253 + gclog_or_tty->print(" Successfully expanded marking stack capacity from "
254 + SIZE_FORMAT "K to " SIZE_FORMAT "K",
255 + _capacity / K, new_capacity / K);
256 + }
257 + // Release the previous (unexpanded) space.
258 + _rs.release();
259 + // Record the new (expanded) space.
260 + _rs = rs;
261 + // Record the new capacity
262 + _capacity = new_capacity;
263 + }
264 + assert(_virtual_space.committed_size() == _rs.size(),
265 + "Didn't reserve backing store for all of ConcurrentMark stack?");
266 + _base = (oop*)(_virtual_space.low());
267 + _index = 0;
268 +}
269 +
270 +void CMMarkStack::set_should_expand() {
271 + // If we're resetting the marking state because of an
272 + // marking stack overflow, record that we should, if
273 + // possible, expand the stack.
274 + _should_expand = _cm->has_overflown();
175 275 }
176 276
177 277 CMMarkStack::~CMMarkStack() {
178 278 if (_base != NULL) {
179 - FREE_C_HEAP_ARRAY(oop, _base, mtGC);
279 + _base = NULL;
280 + _virtual_space.release();
180 281 }
181 282 }
182 283
183 284 void CMMarkStack::par_push(oop ptr) {
184 285 while (true) {
185 286 if (isFull()) {
186 287 _overflow = true;
187 288 return;
188 289 }
189 290 // Otherwise...
190 291 jint index = _index;
191 292 jint next_index = index+1;
192 293 jint res = Atomic::cmpxchg(next_index, &_index, index);
193 294 if (res == index) {
194 295 _base[index] = ptr;
195 296 // Note that we don't maintain this atomically. We could, but it
196 297 // doesn't seem necessary.
197 298 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
198 299 return;
199 300 }
200 301 // Otherwise, we need to try again.
201 302 }
202 303 }
203 304
204 305 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
205 306 while (true) {
206 307 if (isFull()) {
207 308 _overflow = true;
208 309 return;
209 310 }
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
210 311 // Otherwise...
211 312 jint index = _index;
212 313 jint next_index = index + n;
213 314 if (next_index > _capacity) {
214 315 _overflow = true;
215 316 return;
216 317 }
217 318 jint res = Atomic::cmpxchg(next_index, &_index, index);
218 319 if (res == index) {
219 320 for (int i = 0; i < n; i++) {
220 - int ind = index + i;
321 + int ind = index + i;
221 322 assert(ind < _capacity, "By overflow test above.");
222 323 _base[ind] = ptr_arr[i];
223 324 }
224 325 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
225 326 return;
226 327 }
227 328 // Otherwise, we need to try again.
228 329 }
229 330 }
230 331
231 -
232 332 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
233 333 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
234 334 jint start = _index;
235 335 jint next_index = start + n;
236 336 if (next_index > _capacity) {
237 337 _overflow = true;
238 338 return;
239 339 }
240 340 // Otherwise.
241 341 _index = next_index;
242 342 for (int i = 0; i < n; i++) {
243 343 int ind = start + i;
244 344 assert(ind < _capacity, "By overflow test above.");
245 345 _base[ind] = ptr_arr[i];
246 346 }
347 + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
247 348 }
248 349
249 -
250 350 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
251 351 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
252 352 jint index = _index;
253 353 if (index == 0) {
254 354 *n = 0;
255 355 return false;
256 356 } else {
257 357 int k = MIN2(max, index);
258 - jint new_ind = index - k;
358 + jint new_ind = index - k;
259 359 for (int j = 0; j < k; j++) {
260 360 ptr_arr[j] = _base[new_ind + j];
261 361 }
262 362 _index = new_ind;
263 363 *n = k;
264 364 return true;
265 365 }
266 366 }
267 367
268 368 template<class OopClosureClass>
269 369 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
270 370 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
271 371 || SafepointSynchronize::is_at_safepoint(),
272 372 "Drain recursion must be yield-safe.");
273 373 bool res = true;
274 374 debug_only(_drain_in_progress = true);
275 375 debug_only(_drain_in_progress_yields = yield_after);
276 376 while (!isEmpty()) {
277 377 oop newOop = pop();
278 378 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
279 379 assert(newOop->is_oop(), "Expected an oop");
280 380 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
281 381 "only grey objects on this stack");
282 382 newOop->oop_iterate(cl);
283 383 if (yield_after && _cm->do_yield_check()) {
284 384 res = false;
285 385 break;
286 386 }
287 387 }
288 388 debug_only(_drain_in_progress = false);
289 389 return res;
290 390 }
291 391
292 392 void CMMarkStack::note_start_of_gc() {
293 393 assert(_saved_index == -1,
294 394 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
295 395 _saved_index = _index;
296 396 }
297 397
298 398 void CMMarkStack::note_end_of_gc() {
299 399 // This is intentionally a guarantee, instead of an assert. If we
300 400 // accidentally add something to the mark stack during GC, it
301 401 // will be a correctness issue so it's better if we crash. we'll
302 402 // only check this once per GC anyway, so it won't be a performance
303 403 // issue in any way.
304 404 guarantee(_saved_index == _index,
305 405 err_msg("saved index: %d index: %d", _saved_index, _index));
306 406 _saved_index = -1;
307 407 }
308 408
309 409 void CMMarkStack::oops_do(OopClosure* f) {
310 410 assert(_saved_index == _index,
311 411 err_msg("saved index: %d index: %d", _saved_index, _index));
312 412 for (int i = 0; i < _index; i += 1) {
313 413 f->do_oop(&_base[i]);
314 414 }
315 415 }
316 416
317 417 bool ConcurrentMark::not_yet_marked(oop obj) const {
318 418 return _g1h->is_obj_ill(obj);
319 419 }
320 420
321 421 CMRootRegions::CMRootRegions() :
322 422 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
323 423 _should_abort(false), _next_survivor(NULL) { }
324 424
325 425 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
326 426 _young_list = g1h->young_list();
327 427 _cm = cm;
328 428 }
329 429
330 430 void CMRootRegions::prepare_for_scan() {
331 431 assert(!scan_in_progress(), "pre-condition");
332 432
333 433 // Currently, only survivors can be root regions.
334 434 assert(_next_survivor == NULL, "pre-condition");
335 435 _next_survivor = _young_list->first_survivor_region();
336 436 _scan_in_progress = (_next_survivor != NULL);
337 437 _should_abort = false;
338 438 }
339 439
340 440 HeapRegion* CMRootRegions::claim_next() {
341 441 if (_should_abort) {
342 442 // If someone has set the should_abort flag, we return NULL to
343 443 // force the caller to bail out of their loop.
344 444 return NULL;
345 445 }
346 446
347 447 // Currently, only survivors can be root regions.
348 448 HeapRegion* res = _next_survivor;
349 449 if (res != NULL) {
350 450 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
351 451 // Read it again in case it changed while we were waiting for the lock.
352 452 res = _next_survivor;
353 453 if (res != NULL) {
354 454 if (res == _young_list->last_survivor_region()) {
355 455 // We just claimed the last survivor so store NULL to indicate
356 456 // that we're done.
357 457 _next_survivor = NULL;
358 458 } else {
359 459 _next_survivor = res->get_next_young_region();
360 460 }
361 461 } else {
362 462 // Someone else claimed the last survivor while we were trying
363 463 // to take the lock so nothing else to do.
364 464 }
365 465 }
366 466 assert(res == NULL || res->is_survivor(), "post-condition");
367 467
368 468 return res;
369 469 }
370 470
371 471 void CMRootRegions::scan_finished() {
372 472 assert(scan_in_progress(), "pre-condition");
373 473
374 474 // Currently, only survivors can be root regions.
375 475 if (!_should_abort) {
376 476 assert(_next_survivor == NULL, "we should have claimed all survivors");
377 477 }
378 478 _next_survivor = NULL;
379 479
380 480 {
381 481 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
382 482 _scan_in_progress = false;
383 483 RootRegionScan_lock->notify_all();
384 484 }
385 485 }
386 486
387 487 bool CMRootRegions::wait_until_scan_finished() {
388 488 if (!scan_in_progress()) return false;
389 489
390 490 {
391 491 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
392 492 while (scan_in_progress()) {
393 493 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
394 494 }
395 495 }
396 496 return true;
↓ open down ↓ |
128 lines elided |
↑ open up ↑ |
397 497 }
398 498
399 499 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
400 500 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
401 501 #endif // _MSC_VER
402 502
403 503 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
404 504 return MAX2((n_par_threads + 2) / 4, 1U);
405 505 }
406 506
407 -ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
408 - _markBitMap1(rs, MinObjAlignment - 1),
409 - _markBitMap2(rs, MinObjAlignment - 1),
507 +ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
508 + _g1h(g1h),
509 + _markBitMap1(MinObjAlignment - 1),
510 + _markBitMap2(MinObjAlignment - 1),
410 511
411 512 _parallel_marking_threads(0),
412 513 _max_parallel_marking_threads(0),
413 514 _sleep_factor(0.0),
414 515 _marking_task_overhead(1.0),
415 516 _cleanup_sleep_factor(0.0),
416 517 _cleanup_task_overhead(1.0),
417 518 _cleanup_list("Cleanup List"),
418 - _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
419 - _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
420 - CardTableModRefBS::card_shift,
421 - false /* in_resource_area*/),
519 + _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
520 + _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
521 + CardTableModRefBS::card_shift,
522 + false /* in_resource_area*/),
422 523
423 524 _prevMarkBitMap(&_markBitMap1),
424 525 _nextMarkBitMap(&_markBitMap2),
425 526
426 527 _markStack(this),
427 528 // _finger set in set_non_marking_state
428 529
429 530 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
430 531 // _active_tasks set in set_non_marking_state
431 532 // _tasks set inside the constructor
432 533 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
433 534 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
434 535
435 536 _has_overflown(false),
436 537 _concurrent(false),
437 538 _has_aborted(false),
438 539 _restart_for_overflow(false),
439 540 _concurrent_marking_in_progress(false),
440 541
441 542 // _verbose_level set below
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
442 543
443 544 _init_times(),
444 545 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
445 546 _cleanup_times(),
446 547 _total_counting_time(0.0),
447 548 _total_rs_scrub_time(0.0),
448 549
449 550 _parallel_workers(NULL),
450 551
451 552 _count_card_bitmaps(NULL),
452 - _count_marked_bytes(NULL) {
553 + _count_marked_bytes(NULL),
554 + _completed_initialization(false) {
453 555 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
454 556 if (verbose_level < no_verbose) {
455 557 verbose_level = no_verbose;
456 558 }
457 559 if (verbose_level > high_verbose) {
458 560 verbose_level = high_verbose;
459 561 }
460 562 _verbose_level = verbose_level;
461 563
462 564 if (verbose_low()) {
463 565 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
464 566 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
465 567 }
466 568
467 - _markStack.allocate(MarkStackSize);
569 + if (!_markBitMap1.allocate(heap_rs)) {
570 + warning("Failed to allocate first CM bit map");
571 + return;
572 + }
573 + if (!_markBitMap2.allocate(heap_rs)) {
574 + warning("Failed to allocate second CM bit map");
575 + return;
576 + }
468 577
469 578 // Create & start a ConcurrentMark thread.
470 579 _cmThread = new ConcurrentMarkThread(this);
471 580 assert(cmThread() != NULL, "CM Thread should have been created");
472 581 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
473 582
474 - _g1h = G1CollectedHeap::heap();
475 583 assert(CGC_lock != NULL, "Where's the CGC_lock?");
476 - assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
477 - assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
584 + assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
585 + assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
478 586
479 587 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
480 588 satb_qs.set_buffer_size(G1SATBBufferSize);
481 589
482 590 _root_regions.init(_g1h, this);
483 591
484 - _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
485 - _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
486 -
487 - _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
488 - _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
489 -
490 - BitMap::idx_t card_bm_size = _card_bm.size();
491 -
492 - // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
493 - _active_tasks = _max_worker_id;
494 - for (uint i = 0; i < _max_worker_id; ++i) {
495 - CMTaskQueue* task_queue = new CMTaskQueue();
496 - task_queue->initialize();
497 - _task_queues->register_queue(i, task_queue);
498 -
499 - _count_card_bitmaps[i] = BitMap(card_bm_size, false);
500 - _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
501 -
502 - _tasks[i] = new CMTask(i, this,
503 - _count_marked_bytes[i],
504 - &_count_card_bitmaps[i],
505 - task_queue, _task_queues);
506 -
507 - _accum_task_vtime[i] = 0.0;
508 - }
509 -
510 - // Calculate the card number for the bottom of the heap. Used
511 - // in biasing indexes into the accounting card bitmaps.
512 - _heap_bottom_card_num =
513 - intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
514 - CardTableModRefBS::card_shift);
515 -
516 - // Clear all the liveness counting data
517 - clear_all_count_data();
518 -
519 592 if (ConcGCThreads > ParallelGCThreads) {
520 - vm_exit_during_initialization("Can't have more ConcGCThreads "
521 - "than ParallelGCThreads.");
593 + warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
594 + "than ParallelGCThreads (" UINT32_FORMAT ").",
595 + ConcGCThreads, ParallelGCThreads);
596 + return;
522 597 }
523 598 if (ParallelGCThreads == 0) {
524 599 // if we are not running with any parallel GC threads we will not
525 600 // spawn any marking threads either
526 601 _parallel_marking_threads = 0;
527 602 _max_parallel_marking_threads = 0;
528 603 _sleep_factor = 0.0;
529 604 _marking_task_overhead = 1.0;
530 605 } else {
531 606 if (ConcGCThreads > 0) {
532 607 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
533 608 // if both are set
534 609
535 610 _parallel_marking_threads = (uint) ConcGCThreads;
536 611 _max_parallel_marking_threads = _parallel_marking_threads;
537 612 _sleep_factor = 0.0;
538 613 _marking_task_overhead = 1.0;
539 614 } else if (G1MarkingOverheadPercent > 0) {
540 615 // we will calculate the number of parallel marking threads
541 616 // based on a target overhead with respect to the soft real-time
542 617 // goal
543 618
544 619 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
545 620 double overall_cm_overhead =
546 621 (double) MaxGCPauseMillis * marking_overhead /
547 622 (double) GCPauseIntervalMillis;
548 623 double cpu_ratio = 1.0 / (double) os::processor_count();
549 624 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
550 625 double marking_task_overhead =
551 626 overall_cm_overhead / marking_thread_num *
552 627 (double) os::processor_count();
553 628 double sleep_factor =
554 629 (1.0 - marking_task_overhead) / marking_task_overhead;
555 630
556 631 _parallel_marking_threads = (uint) marking_thread_num;
557 632 _max_parallel_marking_threads = _parallel_marking_threads;
558 633 _sleep_factor = sleep_factor;
559 634 _marking_task_overhead = marking_task_overhead;
560 635 } else {
561 636 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
562 637 _max_parallel_marking_threads = _parallel_marking_threads;
563 638 _sleep_factor = 0.0;
564 639 _marking_task_overhead = 1.0;
565 640 }
566 641
567 642 if (parallel_marking_threads() > 1) {
568 643 _cleanup_task_overhead = 1.0;
569 644 } else {
570 645 _cleanup_task_overhead = marking_task_overhead();
571 646 }
572 647 _cleanup_sleep_factor =
573 648 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
574 649
575 650 #if 0
576 651 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
577 652 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
578 653 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
579 654 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
580 655 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
581 656 #endif
582 657
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
583 658 guarantee(parallel_marking_threads() > 0, "peace of mind");
584 659 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
585 660 _max_parallel_marking_threads, false, true);
586 661 if (_parallel_workers == NULL) {
587 662 vm_exit_during_initialization("Failed necessary allocation.");
588 663 } else {
589 664 _parallel_workers->initialize_workers();
590 665 }
591 666 }
592 667
668 + if (FLAG_IS_DEFAULT(MarkStackSize)) {
669 + uintx mark_stack_size =
670 + MIN2(MarkStackSizeMax,
671 + MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
672 + // Verify that the calculated value for MarkStackSize is in range.
673 + // It would be nice to use the private utility routine from Arguments.
674 + if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
675 + warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
676 + "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
677 + mark_stack_size, 1, MarkStackSizeMax);
678 + return;
679 + }
680 + FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
681 + } else {
682 + // Verify MarkStackSize is in range.
683 + if (FLAG_IS_CMDLINE(MarkStackSize)) {
684 + if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
685 + if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
686 + warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
687 + "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
688 + MarkStackSize, 1, MarkStackSizeMax);
689 + return;
690 + }
691 + } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
692 + if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
693 + warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
694 + " or for MarkStackSizeMax (" UINTX_FORMAT ")",
695 + MarkStackSize, MarkStackSizeMax);
696 + return;
697 + }
698 + }
699 + }
700 + }
701 +
702 + if (!_markStack.allocate(MarkStackSize)) {
703 + warning("Failed to allocate CM marking stack");
704 + return;
705 + }
706 +
707 + _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
708 + _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
709 +
710 + _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
711 + _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
712 +
713 + BitMap::idx_t card_bm_size = _card_bm.size();
714 +
715 + // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
716 + _active_tasks = _max_worker_id;
717 +
718 + size_t max_regions = (size_t) _g1h->max_regions();
719 + for (uint i = 0; i < _max_worker_id; ++i) {
720 + CMTaskQueue* task_queue = new CMTaskQueue();
721 + task_queue->initialize();
722 + _task_queues->register_queue(i, task_queue);
723 +
724 + _count_card_bitmaps[i] = BitMap(card_bm_size, false);
725 + _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
726 +
727 + _tasks[i] = new CMTask(i, this,
728 + _count_marked_bytes[i],
729 + &_count_card_bitmaps[i],
730 + task_queue, _task_queues);
731 +
732 + _accum_task_vtime[i] = 0.0;
733 + }
734 +
735 + // Calculate the card number for the bottom of the heap. Used
736 + // in biasing indexes into the accounting card bitmaps.
737 + _heap_bottom_card_num =
738 + intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
739 + CardTableModRefBS::card_shift);
740 +
741 + // Clear all the liveness counting data
742 + clear_all_count_data();
743 +
593 744 // so that the call below can read a sensible value
594 - _heap_start = (HeapWord*) rs.base();
745 + _heap_start = (HeapWord*) heap_rs.base();
595 746 set_non_marking_state();
747 + _completed_initialization = true;
596 748 }
597 749
598 750 void ConcurrentMark::update_g1_committed(bool force) {
599 751 // If concurrent marking is not in progress, then we do not need to
600 752 // update _heap_end.
601 753 if (!concurrent_marking_in_progress() && !force) return;
602 754
603 755 MemRegion committed = _g1h->g1_committed();
604 756 assert(committed.start() == _heap_start, "start shouldn't change");
605 757 HeapWord* new_end = committed.end();
606 758 if (new_end > _heap_end) {
607 759 // The heap has been expanded.
608 760
609 761 _heap_end = new_end;
610 762 }
611 763 // Notice that the heap can also shrink. However, this only happens
612 764 // during a Full GC (at least currently) and the entire marking
613 765 // phase will bail out and the task will not be restarted. So, let's
614 766 // do nothing.
615 767 }
616 768
617 769 void ConcurrentMark::reset() {
618 770 // Starting values for these two. This should be called in a STW
619 771 // phase. CM will be notified of any future g1_committed expansions
620 772 // will be at the end of evacuation pauses, when tasks are
621 773 // inactive.
622 774 MemRegion committed = _g1h->g1_committed();
623 775 _heap_start = committed.start();
624 776 _heap_end = committed.end();
625 777
626 778 // Separated the asserts so that we know which one fires.
627 779 assert(_heap_start != NULL, "heap bounds should look ok");
628 780 assert(_heap_end != NULL, "heap bounds should look ok");
629 781 assert(_heap_start < _heap_end, "heap bounds should look ok");
630 782
631 783 // reset all the marking data structures and any necessary flags
632 784 clear_marking_state();
633 785
634 786 if (verbose_low()) {
635 787 gclog_or_tty->print_cr("[global] resetting");
636 788 }
637 789
638 790 // We do reset all of them, since different phases will use
639 791 // different number of active threads. So, it's easiest to have all
640 792 // of them ready.
641 793 for (uint i = 0; i < _max_worker_id; ++i) {
642 794 _tasks[i]->reset(_nextMarkBitMap);
643 795 }
644 796
645 797 // we need this to make sure that the flag is on during the evac
646 798 // pause with initial mark piggy-backed
647 799 set_concurrent_marking_in_progress();
648 800 }
649 801
650 802 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
651 803 assert(active_tasks <= _max_worker_id, "we should not have more");
652 804
653 805 _active_tasks = active_tasks;
654 806 // Need to update the three data structures below according to the
655 807 // number of active threads for this phase.
656 808 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
657 809 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
658 810 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
659 811
660 812 _concurrent = concurrent;
661 813 // We propagate this to all tasks, not just the active ones.
662 814 for (uint i = 0; i < _max_worker_id; ++i)
663 815 _tasks[i]->set_concurrent(concurrent);
664 816
665 817 if (concurrent) {
666 818 set_concurrent_marking_in_progress();
667 819 } else {
668 820 // We currently assume that the concurrent flag has been set to
669 821 // false before we start remark. At this point we should also be
670 822 // in a STW phase.
671 823 assert(!concurrent_marking_in_progress(), "invariant");
672 824 assert(_finger == _heap_end, "only way to get here");
673 825 update_g1_committed(true);
674 826 }
675 827 }
676 828
677 829 void ConcurrentMark::set_non_marking_state() {
678 830 // We set the global marking state to some default values when we're
679 831 // not doing marking.
680 832 clear_marking_state();
681 833 _active_tasks = 0;
682 834 clear_concurrent_marking_in_progress();
683 835 }
684 836
685 837 ConcurrentMark::~ConcurrentMark() {
686 838 // The ConcurrentMark instance is never freed.
687 839 ShouldNotReachHere();
688 840 }
689 841
690 842 void ConcurrentMark::clearNextBitmap() {
691 843 G1CollectedHeap* g1h = G1CollectedHeap::heap();
692 844 G1CollectorPolicy* g1p = g1h->g1_policy();
693 845
694 846 // Make sure that the concurrent mark thread looks to still be in
695 847 // the current cycle.
696 848 guarantee(cmThread()->during_cycle(), "invariant");
697 849
698 850 // We are finishing up the current cycle by clearing the next
699 851 // marking bitmap and getting it ready for the next cycle. During
700 852 // this time no other cycle can start. So, let's make sure that this
701 853 // is the case.
702 854 guarantee(!g1h->mark_in_progress(), "invariant");
703 855
704 856 // clear the mark bitmap (no grey objects to start with).
705 857 // We need to do this in chunks and offer to yield in between
706 858 // each chunk.
707 859 HeapWord* start = _nextMarkBitMap->startWord();
708 860 HeapWord* end = _nextMarkBitMap->endWord();
709 861 HeapWord* cur = start;
710 862 size_t chunkSize = M;
711 863 while (cur < end) {
712 864 HeapWord* next = cur + chunkSize;
713 865 if (next > end) {
714 866 next = end;
715 867 }
716 868 MemRegion mr(cur,next);
717 869 _nextMarkBitMap->clearRange(mr);
718 870 cur = next;
719 871 do_yield_check();
720 872
721 873 // Repeat the asserts from above. We'll do them as asserts here to
722 874 // minimize their overhead on the product. However, we'll have
723 875 // them as guarantees at the beginning / end of the bitmap
724 876 // clearing to get some checking in the product.
725 877 assert(cmThread()->during_cycle(), "invariant");
726 878 assert(!g1h->mark_in_progress(), "invariant");
727 879 }
728 880
729 881 // Clear the liveness counting data
730 882 clear_all_count_data();
731 883
732 884 // Repeat the asserts from above.
733 885 guarantee(cmThread()->during_cycle(), "invariant");
734 886 guarantee(!g1h->mark_in_progress(), "invariant");
735 887 }
736 888
737 889 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
738 890 public:
739 891 bool doHeapRegion(HeapRegion* r) {
740 892 if (!r->continuesHumongous()) {
741 893 r->note_start_of_marking();
742 894 }
743 895 return false;
744 896 }
745 897 };
746 898
747 899 void ConcurrentMark::checkpointRootsInitialPre() {
748 900 G1CollectedHeap* g1h = G1CollectedHeap::heap();
749 901 G1CollectorPolicy* g1p = g1h->g1_policy();
750 902
751 903 _has_aborted = false;
752 904
753 905 #ifndef PRODUCT
754 906 if (G1PrintReachableAtInitialMark) {
755 907 print_reachable("at-cycle-start",
756 908 VerifyOption_G1UsePrevMarking, true /* all */);
757 909 }
758 910 #endif
759 911
760 912 // Initialise marking structures. This has to be done in a STW phase.
761 913 reset();
762 914
763 915 // For each region note start of marking.
764 916 NoteStartOfMarkHRClosure startcl;
765 917 g1h->heap_region_iterate(&startcl);
766 918 }
767 919
768 920
769 921 void ConcurrentMark::checkpointRootsInitialPost() {
770 922 G1CollectedHeap* g1h = G1CollectedHeap::heap();
771 923
772 924 // If we force an overflow during remark, the remark operation will
773 925 // actually abort and we'll restart concurrent marking. If we always
774 926 // force an oveflow during remark we'll never actually complete the
775 927 // marking phase. So, we initilize this here, at the start of the
776 928 // cycle, so that at the remaining overflow number will decrease at
777 929 // every remark and we'll eventually not need to cause one.
778 930 force_overflow_stw()->init();
779 931
780 932 // Start Concurrent Marking weak-reference discovery.
781 933 ReferenceProcessor* rp = g1h->ref_processor_cm();
782 934 // enable ("weak") refs discovery
783 935 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
784 936 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
785 937
786 938 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
787 939 // This is the start of the marking cycle, we're expected all
788 940 // threads to have SATB queues with active set to false.
789 941 satb_mq_set.set_active_all_threads(true, /* new active value */
790 942 false /* expected_active */);
791 943
792 944 _root_regions.prepare_for_scan();
793 945
794 946 // update_g1_committed() will be called at the end of an evac pause
795 947 // when marking is on. So, it's also called at the end of the
796 948 // initial-mark pause to update the heap end, if the heap expands
797 949 // during it. No need to call it here.
798 950 }
799 951
800 952 /*
801 953 * Notice that in the next two methods, we actually leave the STS
802 954 * during the barrier sync and join it immediately afterwards. If we
803 955 * do not do this, the following deadlock can occur: one thread could
804 956 * be in the barrier sync code, waiting for the other thread to also
805 957 * sync up, whereas another one could be trying to yield, while also
806 958 * waiting for the other threads to sync up too.
807 959 *
808 960 * Note, however, that this code is also used during remark and in
809 961 * this case we should not attempt to leave / enter the STS, otherwise
810 962 * we'll either hit an asseert (debug / fastdebug) or deadlock
811 963 * (product). So we should only leave / enter the STS if we are
812 964 * operating concurrently.
813 965 *
814 966 * Because the thread that does the sync barrier has left the STS, it
815 967 * is possible to be suspended for a Full GC or an evacuation pause
816 968 * could occur. This is actually safe, since the entering the sync
817 969 * barrier is one of the last things do_marking_step() does, and it
818 970 * doesn't manipulate any data structures afterwards.
819 971 */
820 972
821 973 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
822 974 if (verbose_low()) {
823 975 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
824 976 }
825 977
826 978 if (concurrent()) {
827 979 ConcurrentGCThread::stsLeave();
828 980 }
829 981 _first_overflow_barrier_sync.enter();
830 982 if (concurrent()) {
831 983 ConcurrentGCThread::stsJoin();
832 984 }
833 985 // at this point everyone should have synced up and not be doing any
834 986 // more work
835 987
836 988 if (verbose_low()) {
837 989 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
838 990 }
839 991
840 992 // let the task associated with with worker 0 do this
841 993 if (worker_id == 0) {
842 994 // task 0 is responsible for clearing the global data structures
843 995 // We should be here because of an overflow. During STW we should
844 996 // not clear the overflow flag since we rely on it being true when
845 997 // we exit this method to abort the pause and restart concurent
846 998 // marking.
847 999 clear_marking_state(concurrent() /* clear_overflow */);
848 1000 force_overflow()->update();
849 1001
850 1002 if (G1Log::fine()) {
851 1003 gclog_or_tty->date_stamp(PrintGCDateStamps);
852 1004 gclog_or_tty->stamp(PrintGCTimeStamps);
853 1005 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
854 1006 }
855 1007 }
856 1008
857 1009 // after this, each task should reset its own data structures then
858 1010 // then go into the second barrier
859 1011 }
860 1012
861 1013 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
862 1014 if (verbose_low()) {
863 1015 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
864 1016 }
865 1017
866 1018 if (concurrent()) {
867 1019 ConcurrentGCThread::stsLeave();
868 1020 }
869 1021 _second_overflow_barrier_sync.enter();
870 1022 if (concurrent()) {
871 1023 ConcurrentGCThread::stsJoin();
872 1024 }
873 1025 // at this point everything should be re-initialised and ready to go
874 1026
875 1027 if (verbose_low()) {
876 1028 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
877 1029 }
878 1030 }
879 1031
880 1032 #ifndef PRODUCT
881 1033 void ForceOverflowSettings::init() {
882 1034 _num_remaining = G1ConcMarkForceOverflow;
883 1035 _force = false;
884 1036 update();
885 1037 }
886 1038
887 1039 void ForceOverflowSettings::update() {
888 1040 if (_num_remaining > 0) {
889 1041 _num_remaining -= 1;
890 1042 _force = true;
891 1043 } else {
892 1044 _force = false;
893 1045 }
894 1046 }
895 1047
896 1048 bool ForceOverflowSettings::should_force() {
897 1049 if (_force) {
898 1050 _force = false;
899 1051 return true;
900 1052 } else {
901 1053 return false;
902 1054 }
903 1055 }
904 1056 #endif // !PRODUCT
905 1057
906 1058 class CMConcurrentMarkingTask: public AbstractGangTask {
907 1059 private:
908 1060 ConcurrentMark* _cm;
909 1061 ConcurrentMarkThread* _cmt;
910 1062
911 1063 public:
912 1064 void work(uint worker_id) {
913 1065 assert(Thread::current()->is_ConcurrentGC_thread(),
914 1066 "this should only be done by a conc GC thread");
915 1067 ResourceMark rm;
916 1068
917 1069 double start_vtime = os::elapsedVTime();
918 1070
919 1071 ConcurrentGCThread::stsJoin();
920 1072
921 1073 assert(worker_id < _cm->active_tasks(), "invariant");
922 1074 CMTask* the_task = _cm->task(worker_id);
923 1075 the_task->record_start_time();
924 1076 if (!_cm->has_aborted()) {
925 1077 do {
926 1078 double start_vtime_sec = os::elapsedVTime();
927 1079 double start_time_sec = os::elapsedTime();
928 1080 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
929 1081
930 1082 the_task->do_marking_step(mark_step_duration_ms,
931 1083 true /* do_stealing */,
932 1084 true /* do_termination */);
933 1085
934 1086 double end_time_sec = os::elapsedTime();
935 1087 double end_vtime_sec = os::elapsedVTime();
936 1088 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
937 1089 double elapsed_time_sec = end_time_sec - start_time_sec;
938 1090 _cm->clear_has_overflown();
939 1091
940 1092 bool ret = _cm->do_yield_check(worker_id);
941 1093
942 1094 jlong sleep_time_ms;
943 1095 if (!_cm->has_aborted() && the_task->has_aborted()) {
944 1096 sleep_time_ms =
945 1097 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
946 1098 ConcurrentGCThread::stsLeave();
947 1099 os::sleep(Thread::current(), sleep_time_ms, false);
948 1100 ConcurrentGCThread::stsJoin();
949 1101 }
950 1102 double end_time2_sec = os::elapsedTime();
951 1103 double elapsed_time2_sec = end_time2_sec - start_time_sec;
952 1104
953 1105 #if 0
954 1106 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
955 1107 "overhead %1.4lf",
956 1108 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
957 1109 the_task->conc_overhead(os::elapsedTime()) * 8.0);
958 1110 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
959 1111 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
960 1112 #endif
961 1113 } while (!_cm->has_aborted() && the_task->has_aborted());
962 1114 }
963 1115 the_task->record_end_time();
964 1116 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
965 1117
966 1118 ConcurrentGCThread::stsLeave();
967 1119
968 1120 double end_vtime = os::elapsedVTime();
969 1121 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
970 1122 }
971 1123
972 1124 CMConcurrentMarkingTask(ConcurrentMark* cm,
973 1125 ConcurrentMarkThread* cmt) :
974 1126 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
975 1127
976 1128 ~CMConcurrentMarkingTask() { }
977 1129 };
978 1130
979 1131 // Calculates the number of active workers for a concurrent
980 1132 // phase.
981 1133 uint ConcurrentMark::calc_parallel_marking_threads() {
982 1134 if (G1CollectedHeap::use_parallel_gc_threads()) {
983 1135 uint n_conc_workers = 0;
984 1136 if (!UseDynamicNumberOfGCThreads ||
985 1137 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
986 1138 !ForceDynamicNumberOfGCThreads)) {
987 1139 n_conc_workers = max_parallel_marking_threads();
988 1140 } else {
989 1141 n_conc_workers =
990 1142 AdaptiveSizePolicy::calc_default_active_workers(
991 1143 max_parallel_marking_threads(),
992 1144 1, /* Minimum workers */
993 1145 parallel_marking_threads(),
994 1146 Threads::number_of_non_daemon_threads());
995 1147 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
996 1148 // that scaling has already gone into "_max_parallel_marking_threads".
997 1149 }
998 1150 assert(n_conc_workers > 0, "Always need at least 1");
999 1151 return n_conc_workers;
1000 1152 }
1001 1153 // If we are not running with any parallel GC threads we will not
1002 1154 // have spawned any marking threads either. Hence the number of
1003 1155 // concurrent workers should be 0.
1004 1156 return 0;
1005 1157 }
1006 1158
1007 1159 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1008 1160 // Currently, only survivors can be root regions.
1009 1161 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1010 1162 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1011 1163
1012 1164 const uintx interval = PrefetchScanIntervalInBytes;
1013 1165 HeapWord* curr = hr->bottom();
1014 1166 const HeapWord* end = hr->top();
1015 1167 while (curr < end) {
1016 1168 Prefetch::read(curr, interval);
1017 1169 oop obj = oop(curr);
1018 1170 int size = obj->oop_iterate(&cl);
1019 1171 assert(size == obj->size(), "sanity");
1020 1172 curr += size;
1021 1173 }
1022 1174 }
1023 1175
1024 1176 class CMRootRegionScanTask : public AbstractGangTask {
1025 1177 private:
1026 1178 ConcurrentMark* _cm;
1027 1179
1028 1180 public:
1029 1181 CMRootRegionScanTask(ConcurrentMark* cm) :
1030 1182 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1031 1183
1032 1184 void work(uint worker_id) {
1033 1185 assert(Thread::current()->is_ConcurrentGC_thread(),
1034 1186 "this should only be done by a conc GC thread");
1035 1187
1036 1188 CMRootRegions* root_regions = _cm->root_regions();
1037 1189 HeapRegion* hr = root_regions->claim_next();
1038 1190 while (hr != NULL) {
1039 1191 _cm->scanRootRegion(hr, worker_id);
1040 1192 hr = root_regions->claim_next();
1041 1193 }
1042 1194 }
1043 1195 };
1044 1196
1045 1197 void ConcurrentMark::scanRootRegions() {
1046 1198 // scan_in_progress() will have been set to true only if there was
1047 1199 // at least one root region to scan. So, if it's false, we
1048 1200 // should not attempt to do any further work.
1049 1201 if (root_regions()->scan_in_progress()) {
1050 1202 _parallel_marking_threads = calc_parallel_marking_threads();
1051 1203 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1052 1204 "Maximum number of marking threads exceeded");
1053 1205 uint active_workers = MAX2(1U, parallel_marking_threads());
1054 1206
1055 1207 CMRootRegionScanTask task(this);
1056 1208 if (parallel_marking_threads() > 0) {
1057 1209 _parallel_workers->set_active_workers((int) active_workers);
1058 1210 _parallel_workers->run_task(&task);
1059 1211 } else {
1060 1212 task.work(0);
1061 1213 }
1062 1214
1063 1215 // It's possible that has_aborted() is true here without actually
1064 1216 // aborting the survivor scan earlier. This is OK as it's
1065 1217 // mainly used for sanity checking.
1066 1218 root_regions()->scan_finished();
1067 1219 }
1068 1220 }
1069 1221
1070 1222 void ConcurrentMark::markFromRoots() {
1071 1223 // we might be tempted to assert that:
1072 1224 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1073 1225 // "inconsistent argument?");
1074 1226 // However that wouldn't be right, because it's possible that
1075 1227 // a safepoint is indeed in progress as a younger generation
1076 1228 // stop-the-world GC happens even as we mark in this generation.
1077 1229
1078 1230 _restart_for_overflow = false;
1079 1231 force_overflow_conc()->init();
1080 1232
1081 1233 // _g1h has _n_par_threads
1082 1234 _parallel_marking_threads = calc_parallel_marking_threads();
1083 1235 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1084 1236 "Maximum number of marking threads exceeded");
1085 1237
1086 1238 uint active_workers = MAX2(1U, parallel_marking_threads());
1087 1239
1088 1240 // Parallel task terminator is set in "set_phase()"
1089 1241 set_phase(active_workers, true /* concurrent */);
1090 1242
1091 1243 CMConcurrentMarkingTask markingTask(this, cmThread());
1092 1244 if (parallel_marking_threads() > 0) {
1093 1245 _parallel_workers->set_active_workers((int)active_workers);
1094 1246 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1095 1247 // and the decisions on that MT processing is made elsewhere.
1096 1248 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1097 1249 _parallel_workers->run_task(&markingTask);
1098 1250 } else {
1099 1251 markingTask.work(0);
1100 1252 }
1101 1253 print_stats();
1102 1254 }
1103 1255
1104 1256 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1105 1257 // world is stopped at this checkpoint
1106 1258 assert(SafepointSynchronize::is_at_safepoint(),
1107 1259 "world should be stopped");
1108 1260
1109 1261 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1110 1262
1111 1263 // If a full collection has happened, we shouldn't do this.
1112 1264 if (has_aborted()) {
1113 1265 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1114 1266 return;
1115 1267 }
1116 1268
1117 1269 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1118 1270
1119 1271 if (VerifyDuringGC) {
1120 1272 HandleMark hm; // handle scope
1121 1273 gclog_or_tty->print(" VerifyDuringGC:(before)");
1122 1274 Universe::heap()->prepare_for_verify();
1123 1275 Universe::verify(/* silent */ false,
1124 1276 /* option */ VerifyOption_G1UsePrevMarking);
1125 1277 }
1126 1278
1127 1279 G1CollectorPolicy* g1p = g1h->g1_policy();
1128 1280 g1p->record_concurrent_mark_remark_start();
1129 1281
1130 1282 double start = os::elapsedTime();
1131 1283
1132 1284 checkpointRootsFinalWork();
1133 1285
1134 1286 double mark_work_end = os::elapsedTime();
1135 1287
1136 1288 weakRefsWork(clear_all_soft_refs);
1137 1289
1138 1290 if (has_overflown()) {
1139 1291 // Oops. We overflowed. Restart concurrent marking.
1140 1292 _restart_for_overflow = true;
1141 1293 // Clear the flag. We do not need it any more.
1142 1294 clear_has_overflown();
1143 1295 if (G1TraceMarkStackOverflow) {
1144 1296 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1145 1297 }
1146 1298 } else {
1147 1299 // Aggregate the per-task counting data that we have accumulated
1148 1300 // while marking.
1149 1301 aggregate_count_data();
1150 1302
1151 1303 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1152 1304 // We're done with marking.
1153 1305 // This is the end of the marking cycle, we're expected all
1154 1306 // threads to have SATB queues with active set to true.
1155 1307 satb_mq_set.set_active_all_threads(false, /* new active value */
1156 1308 true /* expected_active */);
1157 1309
↓ open down ↓ |
552 lines elided |
↑ open up ↑ |
1158 1310 if (VerifyDuringGC) {
1159 1311 HandleMark hm; // handle scope
1160 1312 gclog_or_tty->print(" VerifyDuringGC:(after)");
1161 1313 Universe::heap()->prepare_for_verify();
1162 1314 Universe::verify(/* silent */ false,
1163 1315 /* option */ VerifyOption_G1UseNextMarking);
1164 1316 }
1165 1317 assert(!restart_for_overflow(), "sanity");
1166 1318 }
1167 1319
1320 + // Expand the marking stack, if we have to and if we can.
1321 + if (_markStack.should_expand()) {
1322 + _markStack.expand();
1323 + }
1324 +
1168 1325 // Reset the marking state if marking completed
1169 1326 if (!restart_for_overflow()) {
1170 1327 set_non_marking_state();
1171 1328 }
1172 1329
1173 1330 #if VERIFY_OBJS_PROCESSED
1174 1331 _scan_obj_cl.objs_processed = 0;
1175 1332 ThreadLocalObjQueue::objs_enqueued = 0;
1176 1333 #endif
1177 1334
1178 1335 // Statistics
1179 1336 double now = os::elapsedTime();
1180 1337 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1181 1338 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1182 1339 _remark_times.add((now - start) * 1000.0);
1183 1340
1184 1341 g1p->record_concurrent_mark_remark_end();
1185 1342 }
1186 1343
1187 1344 // Base class of the closures that finalize and verify the
1188 1345 // liveness counting data.
1189 1346 class CMCountDataClosureBase: public HeapRegionClosure {
1190 1347 protected:
1191 1348 G1CollectedHeap* _g1h;
1192 1349 ConcurrentMark* _cm;
1193 1350 CardTableModRefBS* _ct_bs;
1194 1351
1195 1352 BitMap* _region_bm;
1196 1353 BitMap* _card_bm;
1197 1354
1198 1355 // Takes a region that's not empty (i.e., it has at least one
1199 1356 // live object in it and sets its corresponding bit on the region
1200 1357 // bitmap to 1. If the region is "starts humongous" it will also set
1201 1358 // to 1 the bits on the region bitmap that correspond to its
1202 1359 // associated "continues humongous" regions.
1203 1360 void set_bit_for_region(HeapRegion* hr) {
1204 1361 assert(!hr->continuesHumongous(), "should have filtered those out");
1205 1362
1206 1363 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1207 1364 if (!hr->startsHumongous()) {
1208 1365 // Normal (non-humongous) case: just set the bit.
1209 1366 _region_bm->par_at_put(index, true);
1210 1367 } else {
1211 1368 // Starts humongous case: calculate how many regions are part of
1212 1369 // this humongous region and then set the bit range.
1213 1370 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1214 1371 _region_bm->par_at_put_range(index, end_index, true);
1215 1372 }
1216 1373 }
1217 1374
1218 1375 public:
1219 1376 CMCountDataClosureBase(G1CollectedHeap* g1h,
1220 1377 BitMap* region_bm, BitMap* card_bm):
1221 1378 _g1h(g1h), _cm(g1h->concurrent_mark()),
1222 1379 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1223 1380 _region_bm(region_bm), _card_bm(card_bm) { }
1224 1381 };
1225 1382
1226 1383 // Closure that calculates the # live objects per region. Used
1227 1384 // for verification purposes during the cleanup pause.
1228 1385 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1229 1386 CMBitMapRO* _bm;
1230 1387 size_t _region_marked_bytes;
1231 1388
1232 1389 public:
1233 1390 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1234 1391 BitMap* region_bm, BitMap* card_bm) :
1235 1392 CMCountDataClosureBase(g1h, region_bm, card_bm),
1236 1393 _bm(bm), _region_marked_bytes(0) { }
1237 1394
1238 1395 bool doHeapRegion(HeapRegion* hr) {
1239 1396
1240 1397 if (hr->continuesHumongous()) {
1241 1398 // We will ignore these here and process them when their
1242 1399 // associated "starts humongous" region is processed (see
1243 1400 // set_bit_for_heap_region()). Note that we cannot rely on their
1244 1401 // associated "starts humongous" region to have their bit set to
1245 1402 // 1 since, due to the region chunking in the parallel region
1246 1403 // iteration, a "continues humongous" region might be visited
1247 1404 // before its associated "starts humongous".
1248 1405 return false;
1249 1406 }
1250 1407
1251 1408 HeapWord* ntams = hr->next_top_at_mark_start();
1252 1409 HeapWord* start = hr->bottom();
1253 1410
1254 1411 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1255 1412 err_msg("Preconditions not met - "
1256 1413 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1257 1414 start, ntams, hr->end()));
1258 1415
1259 1416 // Find the first marked object at or after "start".
1260 1417 start = _bm->getNextMarkedWordAddress(start, ntams);
1261 1418
1262 1419 size_t marked_bytes = 0;
1263 1420
1264 1421 while (start < ntams) {
1265 1422 oop obj = oop(start);
1266 1423 int obj_sz = obj->size();
1267 1424 HeapWord* obj_end = start + obj_sz;
1268 1425
1269 1426 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1270 1427 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1271 1428
1272 1429 // Note: if we're looking at the last region in heap - obj_end
1273 1430 // could be actually just beyond the end of the heap; end_idx
1274 1431 // will then correspond to a (non-existent) card that is also
1275 1432 // just beyond the heap.
1276 1433 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1277 1434 // end of object is not card aligned - increment to cover
1278 1435 // all the cards spanned by the object
1279 1436 end_idx += 1;
1280 1437 }
1281 1438
1282 1439 // Set the bits in the card BM for the cards spanned by this object.
1283 1440 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1284 1441
1285 1442 // Add the size of this object to the number of marked bytes.
1286 1443 marked_bytes += (size_t)obj_sz * HeapWordSize;
1287 1444
1288 1445 // Find the next marked object after this one.
1289 1446 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1290 1447 }
1291 1448
1292 1449 // Mark the allocated-since-marking portion...
1293 1450 HeapWord* top = hr->top();
1294 1451 if (ntams < top) {
1295 1452 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1296 1453 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1297 1454
1298 1455 // Note: if we're looking at the last region in heap - top
1299 1456 // could be actually just beyond the end of the heap; end_idx
1300 1457 // will then correspond to a (non-existent) card that is also
1301 1458 // just beyond the heap.
1302 1459 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1303 1460 // end of object is not card aligned - increment to cover
1304 1461 // all the cards spanned by the object
1305 1462 end_idx += 1;
1306 1463 }
1307 1464 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1308 1465
1309 1466 // This definitely means the region has live objects.
1310 1467 set_bit_for_region(hr);
1311 1468 }
1312 1469
1313 1470 // Update the live region bitmap.
1314 1471 if (marked_bytes > 0) {
1315 1472 set_bit_for_region(hr);
1316 1473 }
1317 1474
1318 1475 // Set the marked bytes for the current region so that
1319 1476 // it can be queried by a calling verificiation routine
1320 1477 _region_marked_bytes = marked_bytes;
1321 1478
1322 1479 return false;
1323 1480 }
1324 1481
1325 1482 size_t region_marked_bytes() const { return _region_marked_bytes; }
1326 1483 };
1327 1484
1328 1485 // Heap region closure used for verifying the counting data
1329 1486 // that was accumulated concurrently and aggregated during
1330 1487 // the remark pause. This closure is applied to the heap
1331 1488 // regions during the STW cleanup pause.
1332 1489
1333 1490 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1334 1491 G1CollectedHeap* _g1h;
1335 1492 ConcurrentMark* _cm;
1336 1493 CalcLiveObjectsClosure _calc_cl;
1337 1494 BitMap* _region_bm; // Region BM to be verified
1338 1495 BitMap* _card_bm; // Card BM to be verified
1339 1496 bool _verbose; // verbose output?
1340 1497
1341 1498 BitMap* _exp_region_bm; // Expected Region BM values
1342 1499 BitMap* _exp_card_bm; // Expected card BM values
1343 1500
1344 1501 int _failures;
1345 1502
1346 1503 public:
1347 1504 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1348 1505 BitMap* region_bm,
1349 1506 BitMap* card_bm,
1350 1507 BitMap* exp_region_bm,
1351 1508 BitMap* exp_card_bm,
1352 1509 bool verbose) :
1353 1510 _g1h(g1h), _cm(g1h->concurrent_mark()),
1354 1511 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1355 1512 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1356 1513 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1357 1514 _failures(0) { }
1358 1515
1359 1516 int failures() const { return _failures; }
1360 1517
1361 1518 bool doHeapRegion(HeapRegion* hr) {
1362 1519 if (hr->continuesHumongous()) {
1363 1520 // We will ignore these here and process them when their
1364 1521 // associated "starts humongous" region is processed (see
1365 1522 // set_bit_for_heap_region()). Note that we cannot rely on their
1366 1523 // associated "starts humongous" region to have their bit set to
1367 1524 // 1 since, due to the region chunking in the parallel region
1368 1525 // iteration, a "continues humongous" region might be visited
1369 1526 // before its associated "starts humongous".
1370 1527 return false;
1371 1528 }
1372 1529
1373 1530 int failures = 0;
1374 1531
1375 1532 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1376 1533 // this region and set the corresponding bits in the expected region
1377 1534 // and card bitmaps.
1378 1535 bool res = _calc_cl.doHeapRegion(hr);
1379 1536 assert(res == false, "should be continuing");
1380 1537
1381 1538 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1382 1539 Mutex::_no_safepoint_check_flag);
1383 1540
1384 1541 // Verify the marked bytes for this region.
1385 1542 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1386 1543 size_t act_marked_bytes = hr->next_marked_bytes();
1387 1544
1388 1545 // We're not OK if expected marked bytes > actual marked bytes. It means
1389 1546 // we have missed accounting some objects during the actual marking.
1390 1547 if (exp_marked_bytes > act_marked_bytes) {
1391 1548 if (_verbose) {
1392 1549 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1393 1550 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1394 1551 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1395 1552 }
1396 1553 failures += 1;
1397 1554 }
1398 1555
1399 1556 // Verify the bit, for this region, in the actual and expected
1400 1557 // (which was just calculated) region bit maps.
1401 1558 // We're not OK if the bit in the calculated expected region
1402 1559 // bitmap is set and the bit in the actual region bitmap is not.
1403 1560 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1404 1561
1405 1562 bool expected = _exp_region_bm->at(index);
1406 1563 bool actual = _region_bm->at(index);
1407 1564 if (expected && !actual) {
1408 1565 if (_verbose) {
1409 1566 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1410 1567 "expected: %s, actual: %s",
1411 1568 hr->hrs_index(),
1412 1569 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1413 1570 }
1414 1571 failures += 1;
1415 1572 }
1416 1573
1417 1574 // Verify that the card bit maps for the cards spanned by the current
1418 1575 // region match. We have an error if we have a set bit in the expected
1419 1576 // bit map and the corresponding bit in the actual bitmap is not set.
1420 1577
1421 1578 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1422 1579 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1423 1580
1424 1581 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1425 1582 expected = _exp_card_bm->at(i);
1426 1583 actual = _card_bm->at(i);
1427 1584
1428 1585 if (expected && !actual) {
1429 1586 if (_verbose) {
1430 1587 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1431 1588 "expected: %s, actual: %s",
1432 1589 hr->hrs_index(), i,
1433 1590 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1434 1591 }
1435 1592 failures += 1;
1436 1593 }
1437 1594 }
1438 1595
1439 1596 if (failures > 0 && _verbose) {
1440 1597 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1441 1598 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1442 1599 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1443 1600 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1444 1601 }
1445 1602
1446 1603 _failures += failures;
1447 1604
1448 1605 // We could stop iteration over the heap when we
1449 1606 // find the first violating region by returning true.
1450 1607 return false;
1451 1608 }
1452 1609 };
1453 1610
1454 1611
1455 1612 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1456 1613 protected:
1457 1614 G1CollectedHeap* _g1h;
1458 1615 ConcurrentMark* _cm;
1459 1616 BitMap* _actual_region_bm;
1460 1617 BitMap* _actual_card_bm;
1461 1618
1462 1619 uint _n_workers;
1463 1620
1464 1621 BitMap* _expected_region_bm;
1465 1622 BitMap* _expected_card_bm;
1466 1623
1467 1624 int _failures;
1468 1625 bool _verbose;
1469 1626
1470 1627 public:
1471 1628 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1472 1629 BitMap* region_bm, BitMap* card_bm,
1473 1630 BitMap* expected_region_bm, BitMap* expected_card_bm)
1474 1631 : AbstractGangTask("G1 verify final counting"),
1475 1632 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1476 1633 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1477 1634 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1478 1635 _failures(0), _verbose(false),
1479 1636 _n_workers(0) {
1480 1637 assert(VerifyDuringGC, "don't call this otherwise");
1481 1638
1482 1639 // Use the value already set as the number of active threads
1483 1640 // in the call to run_task().
1484 1641 if (G1CollectedHeap::use_parallel_gc_threads()) {
1485 1642 assert( _g1h->workers()->active_workers() > 0,
1486 1643 "Should have been previously set");
1487 1644 _n_workers = _g1h->workers()->active_workers();
1488 1645 } else {
1489 1646 _n_workers = 1;
1490 1647 }
1491 1648
1492 1649 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1493 1650 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1494 1651
1495 1652 _verbose = _cm->verbose_medium();
1496 1653 }
1497 1654
1498 1655 void work(uint worker_id) {
1499 1656 assert(worker_id < _n_workers, "invariant");
1500 1657
1501 1658 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1502 1659 _actual_region_bm, _actual_card_bm,
1503 1660 _expected_region_bm,
1504 1661 _expected_card_bm,
1505 1662 _verbose);
1506 1663
1507 1664 if (G1CollectedHeap::use_parallel_gc_threads()) {
1508 1665 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1509 1666 worker_id,
1510 1667 _n_workers,
1511 1668 HeapRegion::VerifyCountClaimValue);
1512 1669 } else {
1513 1670 _g1h->heap_region_iterate(&verify_cl);
1514 1671 }
1515 1672
1516 1673 Atomic::add(verify_cl.failures(), &_failures);
1517 1674 }
1518 1675
1519 1676 int failures() const { return _failures; }
1520 1677 };
1521 1678
1522 1679 // Closure that finalizes the liveness counting data.
1523 1680 // Used during the cleanup pause.
1524 1681 // Sets the bits corresponding to the interval [NTAMS, top]
1525 1682 // (which contains the implicitly live objects) in the
1526 1683 // card liveness bitmap. Also sets the bit for each region,
1527 1684 // containing live data, in the region liveness bitmap.
1528 1685
1529 1686 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1530 1687 public:
1531 1688 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1532 1689 BitMap* region_bm,
1533 1690 BitMap* card_bm) :
1534 1691 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1535 1692
1536 1693 bool doHeapRegion(HeapRegion* hr) {
1537 1694
1538 1695 if (hr->continuesHumongous()) {
1539 1696 // We will ignore these here and process them when their
1540 1697 // associated "starts humongous" region is processed (see
1541 1698 // set_bit_for_heap_region()). Note that we cannot rely on their
1542 1699 // associated "starts humongous" region to have their bit set to
1543 1700 // 1 since, due to the region chunking in the parallel region
1544 1701 // iteration, a "continues humongous" region might be visited
1545 1702 // before its associated "starts humongous".
1546 1703 return false;
1547 1704 }
1548 1705
1549 1706 HeapWord* ntams = hr->next_top_at_mark_start();
1550 1707 HeapWord* top = hr->top();
1551 1708
1552 1709 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1553 1710
1554 1711 // Mark the allocated-since-marking portion...
1555 1712 if (ntams < top) {
1556 1713 // This definitely means the region has live objects.
1557 1714 set_bit_for_region(hr);
1558 1715
1559 1716 // Now set the bits in the card bitmap for [ntams, top)
1560 1717 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1561 1718 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1562 1719
1563 1720 // Note: if we're looking at the last region in heap - top
1564 1721 // could be actually just beyond the end of the heap; end_idx
1565 1722 // will then correspond to a (non-existent) card that is also
1566 1723 // just beyond the heap.
1567 1724 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1568 1725 // end of object is not card aligned - increment to cover
1569 1726 // all the cards spanned by the object
1570 1727 end_idx += 1;
1571 1728 }
1572 1729
1573 1730 assert(end_idx <= _card_bm->size(),
1574 1731 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1575 1732 end_idx, _card_bm->size()));
1576 1733 assert(start_idx < _card_bm->size(),
1577 1734 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1578 1735 start_idx, _card_bm->size()));
1579 1736
1580 1737 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1581 1738 }
1582 1739
1583 1740 // Set the bit for the region if it contains live data
1584 1741 if (hr->next_marked_bytes() > 0) {
1585 1742 set_bit_for_region(hr);
1586 1743 }
1587 1744
1588 1745 return false;
1589 1746 }
1590 1747 };
1591 1748
1592 1749 class G1ParFinalCountTask: public AbstractGangTask {
1593 1750 protected:
1594 1751 G1CollectedHeap* _g1h;
1595 1752 ConcurrentMark* _cm;
1596 1753 BitMap* _actual_region_bm;
1597 1754 BitMap* _actual_card_bm;
1598 1755
1599 1756 uint _n_workers;
1600 1757
1601 1758 public:
1602 1759 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1603 1760 : AbstractGangTask("G1 final counting"),
1604 1761 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1605 1762 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1606 1763 _n_workers(0) {
1607 1764 // Use the value already set as the number of active threads
1608 1765 // in the call to run_task().
1609 1766 if (G1CollectedHeap::use_parallel_gc_threads()) {
1610 1767 assert( _g1h->workers()->active_workers() > 0,
1611 1768 "Should have been previously set");
1612 1769 _n_workers = _g1h->workers()->active_workers();
1613 1770 } else {
1614 1771 _n_workers = 1;
1615 1772 }
1616 1773 }
1617 1774
1618 1775 void work(uint worker_id) {
1619 1776 assert(worker_id < _n_workers, "invariant");
1620 1777
1621 1778 FinalCountDataUpdateClosure final_update_cl(_g1h,
1622 1779 _actual_region_bm,
1623 1780 _actual_card_bm);
1624 1781
1625 1782 if (G1CollectedHeap::use_parallel_gc_threads()) {
1626 1783 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1627 1784 worker_id,
1628 1785 _n_workers,
1629 1786 HeapRegion::FinalCountClaimValue);
1630 1787 } else {
1631 1788 _g1h->heap_region_iterate(&final_update_cl);
1632 1789 }
1633 1790 }
1634 1791 };
1635 1792
1636 1793 class G1ParNoteEndTask;
1637 1794
1638 1795 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1639 1796 G1CollectedHeap* _g1;
1640 1797 int _worker_num;
1641 1798 size_t _max_live_bytes;
1642 1799 uint _regions_claimed;
1643 1800 size_t _freed_bytes;
1644 1801 FreeRegionList* _local_cleanup_list;
1645 1802 OldRegionSet* _old_proxy_set;
1646 1803 HumongousRegionSet* _humongous_proxy_set;
1647 1804 HRRSCleanupTask* _hrrs_cleanup_task;
1648 1805 double _claimed_region_time;
1649 1806 double _max_region_time;
1650 1807
1651 1808 public:
1652 1809 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1653 1810 int worker_num,
1654 1811 FreeRegionList* local_cleanup_list,
1655 1812 OldRegionSet* old_proxy_set,
1656 1813 HumongousRegionSet* humongous_proxy_set,
1657 1814 HRRSCleanupTask* hrrs_cleanup_task) :
1658 1815 _g1(g1), _worker_num(worker_num),
1659 1816 _max_live_bytes(0), _regions_claimed(0),
1660 1817 _freed_bytes(0),
1661 1818 _claimed_region_time(0.0), _max_region_time(0.0),
1662 1819 _local_cleanup_list(local_cleanup_list),
1663 1820 _old_proxy_set(old_proxy_set),
1664 1821 _humongous_proxy_set(humongous_proxy_set),
1665 1822 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1666 1823
1667 1824 size_t freed_bytes() { return _freed_bytes; }
1668 1825
1669 1826 bool doHeapRegion(HeapRegion *hr) {
1670 1827 if (hr->continuesHumongous()) {
1671 1828 return false;
1672 1829 }
1673 1830 // We use a claim value of zero here because all regions
1674 1831 // were claimed with value 1 in the FinalCount task.
1675 1832 _g1->reset_gc_time_stamps(hr);
1676 1833 double start = os::elapsedTime();
1677 1834 _regions_claimed++;
1678 1835 hr->note_end_of_marking();
1679 1836 _max_live_bytes += hr->max_live_bytes();
1680 1837 _g1->free_region_if_empty(hr,
1681 1838 &_freed_bytes,
1682 1839 _local_cleanup_list,
1683 1840 _old_proxy_set,
1684 1841 _humongous_proxy_set,
1685 1842 _hrrs_cleanup_task,
1686 1843 true /* par */);
1687 1844 double region_time = (os::elapsedTime() - start);
1688 1845 _claimed_region_time += region_time;
1689 1846 if (region_time > _max_region_time) {
1690 1847 _max_region_time = region_time;
1691 1848 }
1692 1849 return false;
1693 1850 }
1694 1851
1695 1852 size_t max_live_bytes() { return _max_live_bytes; }
1696 1853 uint regions_claimed() { return _regions_claimed; }
1697 1854 double claimed_region_time_sec() { return _claimed_region_time; }
1698 1855 double max_region_time_sec() { return _max_region_time; }
1699 1856 };
1700 1857
1701 1858 class G1ParNoteEndTask: public AbstractGangTask {
1702 1859 friend class G1NoteEndOfConcMarkClosure;
1703 1860
1704 1861 protected:
1705 1862 G1CollectedHeap* _g1h;
1706 1863 size_t _max_live_bytes;
1707 1864 size_t _freed_bytes;
1708 1865 FreeRegionList* _cleanup_list;
1709 1866
1710 1867 public:
1711 1868 G1ParNoteEndTask(G1CollectedHeap* g1h,
1712 1869 FreeRegionList* cleanup_list) :
1713 1870 AbstractGangTask("G1 note end"), _g1h(g1h),
1714 1871 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1715 1872
1716 1873 void work(uint worker_id) {
1717 1874 double start = os::elapsedTime();
1718 1875 FreeRegionList local_cleanup_list("Local Cleanup List");
1719 1876 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1720 1877 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1721 1878 HRRSCleanupTask hrrs_cleanup_task;
1722 1879 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1723 1880 &old_proxy_set,
1724 1881 &humongous_proxy_set,
1725 1882 &hrrs_cleanup_task);
1726 1883 if (G1CollectedHeap::use_parallel_gc_threads()) {
1727 1884 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1728 1885 _g1h->workers()->active_workers(),
1729 1886 HeapRegion::NoteEndClaimValue);
1730 1887 } else {
1731 1888 _g1h->heap_region_iterate(&g1_note_end);
1732 1889 }
1733 1890 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1734 1891
1735 1892 // Now update the lists
1736 1893 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1737 1894 NULL /* free_list */,
1738 1895 &old_proxy_set,
1739 1896 &humongous_proxy_set,
1740 1897 true /* par */);
1741 1898 {
1742 1899 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1743 1900 _max_live_bytes += g1_note_end.max_live_bytes();
1744 1901 _freed_bytes += g1_note_end.freed_bytes();
1745 1902
1746 1903 // If we iterate over the global cleanup list at the end of
1747 1904 // cleanup to do this printing we will not guarantee to only
1748 1905 // generate output for the newly-reclaimed regions (the list
1749 1906 // might not be empty at the beginning of cleanup; we might
1750 1907 // still be working on its previous contents). So we do the
1751 1908 // printing here, before we append the new regions to the global
1752 1909 // cleanup list.
1753 1910
1754 1911 G1HRPrinter* hr_printer = _g1h->hr_printer();
1755 1912 if (hr_printer->is_active()) {
1756 1913 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1757 1914 while (iter.more_available()) {
1758 1915 HeapRegion* hr = iter.get_next();
1759 1916 hr_printer->cleanup(hr);
1760 1917 }
1761 1918 }
1762 1919
1763 1920 _cleanup_list->add_as_tail(&local_cleanup_list);
1764 1921 assert(local_cleanup_list.is_empty(), "post-condition");
1765 1922
1766 1923 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1767 1924 }
1768 1925 }
1769 1926 size_t max_live_bytes() { return _max_live_bytes; }
1770 1927 size_t freed_bytes() { return _freed_bytes; }
1771 1928 };
1772 1929
1773 1930 class G1ParScrubRemSetTask: public AbstractGangTask {
1774 1931 protected:
1775 1932 G1RemSet* _g1rs;
1776 1933 BitMap* _region_bm;
1777 1934 BitMap* _card_bm;
1778 1935 public:
1779 1936 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1780 1937 BitMap* region_bm, BitMap* card_bm) :
1781 1938 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1782 1939 _region_bm(region_bm), _card_bm(card_bm) { }
1783 1940
1784 1941 void work(uint worker_id) {
1785 1942 if (G1CollectedHeap::use_parallel_gc_threads()) {
1786 1943 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1787 1944 HeapRegion::ScrubRemSetClaimValue);
1788 1945 } else {
1789 1946 _g1rs->scrub(_region_bm, _card_bm);
1790 1947 }
1791 1948 }
1792 1949
1793 1950 };
1794 1951
1795 1952 void ConcurrentMark::cleanup() {
1796 1953 // world is stopped at this checkpoint
1797 1954 assert(SafepointSynchronize::is_at_safepoint(),
1798 1955 "world should be stopped");
1799 1956 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1800 1957
1801 1958 // If a full collection has happened, we shouldn't do this.
1802 1959 if (has_aborted()) {
1803 1960 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1804 1961 return;
1805 1962 }
1806 1963
1807 1964 HRSPhaseSetter x(HRSPhaseCleanup);
1808 1965 g1h->verify_region_sets_optional();
1809 1966
1810 1967 if (VerifyDuringGC) {
1811 1968 HandleMark hm; // handle scope
1812 1969 gclog_or_tty->print(" VerifyDuringGC:(before)");
1813 1970 Universe::heap()->prepare_for_verify();
1814 1971 Universe::verify(/* silent */ false,
1815 1972 /* option */ VerifyOption_G1UsePrevMarking);
1816 1973 }
1817 1974
1818 1975 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1819 1976 g1p->record_concurrent_mark_cleanup_start();
1820 1977
1821 1978 double start = os::elapsedTime();
1822 1979
1823 1980 HeapRegionRemSet::reset_for_cleanup_tasks();
1824 1981
1825 1982 uint n_workers;
1826 1983
1827 1984 // Do counting once more with the world stopped for good measure.
1828 1985 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1829 1986
1830 1987 if (G1CollectedHeap::use_parallel_gc_threads()) {
1831 1988 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1832 1989 "sanity check");
1833 1990
1834 1991 g1h->set_par_threads();
1835 1992 n_workers = g1h->n_par_threads();
1836 1993 assert(g1h->n_par_threads() == n_workers,
1837 1994 "Should not have been reset");
1838 1995 g1h->workers()->run_task(&g1_par_count_task);
1839 1996 // Done with the parallel phase so reset to 0.
1840 1997 g1h->set_par_threads(0);
1841 1998
1842 1999 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1843 2000 "sanity check");
1844 2001 } else {
1845 2002 n_workers = 1;
1846 2003 g1_par_count_task.work(0);
1847 2004 }
1848 2005
1849 2006 if (VerifyDuringGC) {
1850 2007 // Verify that the counting data accumulated during marking matches
1851 2008 // that calculated by walking the marking bitmap.
1852 2009
1853 2010 // Bitmaps to hold expected values
1854 2011 BitMap expected_region_bm(_region_bm.size(), false);
1855 2012 BitMap expected_card_bm(_card_bm.size(), false);
1856 2013
1857 2014 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1858 2015 &_region_bm,
1859 2016 &_card_bm,
1860 2017 &expected_region_bm,
1861 2018 &expected_card_bm);
1862 2019
1863 2020 if (G1CollectedHeap::use_parallel_gc_threads()) {
1864 2021 g1h->set_par_threads((int)n_workers);
1865 2022 g1h->workers()->run_task(&g1_par_verify_task);
1866 2023 // Done with the parallel phase so reset to 0.
1867 2024 g1h->set_par_threads(0);
1868 2025
1869 2026 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1870 2027 "sanity check");
1871 2028 } else {
1872 2029 g1_par_verify_task.work(0);
1873 2030 }
1874 2031
1875 2032 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1876 2033 }
1877 2034
1878 2035 size_t start_used_bytes = g1h->used();
1879 2036 g1h->set_marking_complete();
1880 2037
1881 2038 double count_end = os::elapsedTime();
1882 2039 double this_final_counting_time = (count_end - start);
1883 2040 _total_counting_time += this_final_counting_time;
1884 2041
1885 2042 if (G1PrintRegionLivenessInfo) {
1886 2043 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1887 2044 _g1h->heap_region_iterate(&cl);
1888 2045 }
1889 2046
1890 2047 // Install newly created mark bitMap as "prev".
1891 2048 swapMarkBitMaps();
1892 2049
1893 2050 g1h->reset_gc_time_stamp();
1894 2051
1895 2052 // Note end of marking in all heap regions.
1896 2053 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1897 2054 if (G1CollectedHeap::use_parallel_gc_threads()) {
1898 2055 g1h->set_par_threads((int)n_workers);
1899 2056 g1h->workers()->run_task(&g1_par_note_end_task);
1900 2057 g1h->set_par_threads(0);
1901 2058
1902 2059 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1903 2060 "sanity check");
1904 2061 } else {
1905 2062 g1_par_note_end_task.work(0);
1906 2063 }
1907 2064 g1h->check_gc_time_stamps();
1908 2065
1909 2066 if (!cleanup_list_is_empty()) {
1910 2067 // The cleanup list is not empty, so we'll have to process it
1911 2068 // concurrently. Notify anyone else that might be wanting free
1912 2069 // regions that there will be more free regions coming soon.
1913 2070 g1h->set_free_regions_coming();
1914 2071 }
1915 2072
1916 2073 // call below, since it affects the metric by which we sort the heap
1917 2074 // regions.
1918 2075 if (G1ScrubRemSets) {
1919 2076 double rs_scrub_start = os::elapsedTime();
1920 2077 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1921 2078 if (G1CollectedHeap::use_parallel_gc_threads()) {
1922 2079 g1h->set_par_threads((int)n_workers);
1923 2080 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1924 2081 g1h->set_par_threads(0);
1925 2082
1926 2083 assert(g1h->check_heap_region_claim_values(
1927 2084 HeapRegion::ScrubRemSetClaimValue),
1928 2085 "sanity check");
1929 2086 } else {
1930 2087 g1_par_scrub_rs_task.work(0);
1931 2088 }
1932 2089
1933 2090 double rs_scrub_end = os::elapsedTime();
1934 2091 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1935 2092 _total_rs_scrub_time += this_rs_scrub_time;
1936 2093 }
1937 2094
1938 2095 // this will also free any regions totally full of garbage objects,
1939 2096 // and sort the regions.
1940 2097 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1941 2098
1942 2099 // Statistics.
1943 2100 double end = os::elapsedTime();
1944 2101 _cleanup_times.add((end - start) * 1000.0);
1945 2102
1946 2103 if (G1Log::fine()) {
1947 2104 g1h->print_size_transition(gclog_or_tty,
1948 2105 start_used_bytes,
1949 2106 g1h->used(),
1950 2107 g1h->capacity());
1951 2108 }
1952 2109
1953 2110 // Clean up will have freed any regions completely full of garbage.
1954 2111 // Update the soft reference policy with the new heap occupancy.
1955 2112 Universe::update_heap_info_at_gc();
1956 2113
1957 2114 // We need to make this be a "collection" so any collection pause that
1958 2115 // races with it goes around and waits for completeCleanup to finish.
1959 2116 g1h->increment_total_collections();
1960 2117
1961 2118 // We reclaimed old regions so we should calculate the sizes to make
1962 2119 // sure we update the old gen/space data.
1963 2120 g1h->g1mm()->update_sizes();
1964 2121
1965 2122 if (VerifyDuringGC) {
1966 2123 HandleMark hm; // handle scope
1967 2124 gclog_or_tty->print(" VerifyDuringGC:(after)");
1968 2125 Universe::heap()->prepare_for_verify();
1969 2126 Universe::verify(/* silent */ false,
1970 2127 /* option */ VerifyOption_G1UsePrevMarking);
1971 2128 }
1972 2129
1973 2130 g1h->verify_region_sets_optional();
1974 2131 }
1975 2132
1976 2133 void ConcurrentMark::completeCleanup() {
1977 2134 if (has_aborted()) return;
1978 2135
1979 2136 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1980 2137
1981 2138 _cleanup_list.verify_optional();
1982 2139 FreeRegionList tmp_free_list("Tmp Free List");
1983 2140
1984 2141 if (G1ConcRegionFreeingVerbose) {
1985 2142 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1986 2143 "cleanup list has %u entries",
1987 2144 _cleanup_list.length());
1988 2145 }
1989 2146
1990 2147 // Noone else should be accessing the _cleanup_list at this point,
1991 2148 // so it's not necessary to take any locks
1992 2149 while (!_cleanup_list.is_empty()) {
1993 2150 HeapRegion* hr = _cleanup_list.remove_head();
1994 2151 assert(hr != NULL, "the list was not empty");
1995 2152 hr->par_clear();
1996 2153 tmp_free_list.add_as_tail(hr);
1997 2154
1998 2155 // Instead of adding one region at a time to the secondary_free_list,
1999 2156 // we accumulate them in the local list and move them a few at a
2000 2157 // time. This also cuts down on the number of notify_all() calls
2001 2158 // we do during this process. We'll also append the local list when
2002 2159 // _cleanup_list is empty (which means we just removed the last
2003 2160 // region from the _cleanup_list).
2004 2161 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2005 2162 _cleanup_list.is_empty()) {
2006 2163 if (G1ConcRegionFreeingVerbose) {
2007 2164 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2008 2165 "appending %u entries to the secondary_free_list, "
2009 2166 "cleanup list still has %u entries",
2010 2167 tmp_free_list.length(),
2011 2168 _cleanup_list.length());
2012 2169 }
2013 2170
2014 2171 {
2015 2172 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2016 2173 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2017 2174 SecondaryFreeList_lock->notify_all();
2018 2175 }
2019 2176
2020 2177 if (G1StressConcRegionFreeing) {
2021 2178 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2022 2179 os::sleep(Thread::current(), (jlong) 1, false);
2023 2180 }
2024 2181 }
2025 2182 }
2026 2183 }
2027 2184 assert(tmp_free_list.is_empty(), "post-condition");
2028 2185 }
2029 2186
2030 2187 // Support closures for reference procssing in G1
2031 2188
2032 2189 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2033 2190 HeapWord* addr = (HeapWord*)obj;
2034 2191 return addr != NULL &&
2035 2192 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2036 2193 }
2037 2194
2038 2195 class G1CMKeepAliveClosure: public ExtendedOopClosure {
2039 2196 G1CollectedHeap* _g1;
2040 2197 ConcurrentMark* _cm;
2041 2198 public:
2042 2199 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2043 2200 _g1(g1), _cm(cm) {
2044 2201 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2045 2202 }
2046 2203
2047 2204 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2048 2205 virtual void do_oop( oop* p) { do_oop_work(p); }
2049 2206
2050 2207 template <class T> void do_oop_work(T* p) {
2051 2208 oop obj = oopDesc::load_decode_heap_oop(p);
2052 2209 HeapWord* addr = (HeapWord*)obj;
2053 2210
2054 2211 if (_cm->verbose_high()) {
2055 2212 gclog_or_tty->print_cr("\t[0] we're looking at location "
2056 2213 "*"PTR_FORMAT" = "PTR_FORMAT,
2057 2214 p, (void*) obj);
2058 2215 }
2059 2216
2060 2217 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2061 2218 _cm->mark_and_count(obj);
2062 2219 _cm->mark_stack_push(obj);
2063 2220 }
2064 2221 }
2065 2222 };
2066 2223
2067 2224 class G1CMDrainMarkingStackClosure: public VoidClosure {
2068 2225 ConcurrentMark* _cm;
2069 2226 CMMarkStack* _markStack;
2070 2227 G1CMKeepAliveClosure* _oopClosure;
2071 2228 public:
2072 2229 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2073 2230 G1CMKeepAliveClosure* oopClosure) :
2074 2231 _cm(cm),
2075 2232 _markStack(markStack),
2076 2233 _oopClosure(oopClosure) { }
2077 2234
2078 2235 void do_void() {
2079 2236 _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
2080 2237 }
2081 2238 };
2082 2239
2083 2240 // 'Keep Alive' closure used by parallel reference processing.
2084 2241 // An instance of this closure is used in the parallel reference processing
2085 2242 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2086 2243 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2087 2244 // placed on to discovered ref lists once so we can mark and push with no
2088 2245 // need to check whether the object has already been marked. Using the
2089 2246 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2090 2247 // operating on the global mark stack. This means that an individual
2091 2248 // worker would be doing lock-free pushes while it processes its own
2092 2249 // discovered ref list followed by drain call. If the discovered ref lists
2093 2250 // are unbalanced then this could cause interference with the other
2094 2251 // workers. Using a CMTask (and its embedded local data structures)
2095 2252 // avoids that potential interference.
2096 2253 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2097 2254 ConcurrentMark* _cm;
2098 2255 CMTask* _task;
2099 2256 int _ref_counter_limit;
2100 2257 int _ref_counter;
2101 2258 public:
2102 2259 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2103 2260 _cm(cm), _task(task),
2104 2261 _ref_counter_limit(G1RefProcDrainInterval) {
2105 2262 assert(_ref_counter_limit > 0, "sanity");
2106 2263 _ref_counter = _ref_counter_limit;
2107 2264 }
2108 2265
2109 2266 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2110 2267 virtual void do_oop( oop* p) { do_oop_work(p); }
2111 2268
2112 2269 template <class T> void do_oop_work(T* p) {
2113 2270 if (!_cm->has_overflown()) {
2114 2271 oop obj = oopDesc::load_decode_heap_oop(p);
2115 2272 if (_cm->verbose_high()) {
2116 2273 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2117 2274 "*"PTR_FORMAT" = "PTR_FORMAT,
2118 2275 _task->worker_id(), p, (void*) obj);
2119 2276 }
2120 2277
2121 2278 _task->deal_with_reference(obj);
2122 2279 _ref_counter--;
2123 2280
2124 2281 if (_ref_counter == 0) {
2125 2282 // We have dealt with _ref_counter_limit references, pushing them and objects
2126 2283 // reachable from them on to the local stack (and possibly the global stack).
2127 2284 // Call do_marking_step() to process these entries. We call the routine in a
2128 2285 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2129 2286 // with the entries that we've pushed as a result of the deal_with_reference
2130 2287 // calls above) or we overflow.
2131 2288 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2132 2289 // while there may still be some work to do. (See the comment at the
2133 2290 // beginning of CMTask::do_marking_step() for those conditions - one of which
2134 2291 // is reaching the specified time target.) It is only when
2135 2292 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2136 2293 // that the marking has completed.
2137 2294 do {
2138 2295 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2139 2296 _task->do_marking_step(mark_step_duration_ms,
2140 2297 false /* do_stealing */,
2141 2298 false /* do_termination */);
2142 2299 } while (_task->has_aborted() && !_cm->has_overflown());
2143 2300 _ref_counter = _ref_counter_limit;
2144 2301 }
2145 2302 } else {
2146 2303 if (_cm->verbose_high()) {
2147 2304 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2148 2305 }
2149 2306 }
2150 2307 }
2151 2308 };
2152 2309
2153 2310 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2154 2311 ConcurrentMark* _cm;
2155 2312 CMTask* _task;
2156 2313 public:
2157 2314 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2158 2315 _cm(cm), _task(task) { }
2159 2316
2160 2317 void do_void() {
2161 2318 do {
2162 2319 if (_cm->verbose_high()) {
2163 2320 gclog_or_tty->print_cr("\t[%u] Drain: Calling do marking_step",
2164 2321 _task->worker_id());
2165 2322 }
2166 2323
2167 2324 // We call CMTask::do_marking_step() to completely drain the local and
2168 2325 // global marking stacks. The routine is called in a loop, which we'll
2169 2326 // exit if there's nothing more to do (i.e. we'completely drained the
2170 2327 // entries that were pushed as a result of applying the
2171 2328 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2172 2329 // lists above) or we overflow the global marking stack.
2173 2330 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2174 2331 // while there may still be some work to do. (See the comment at the
2175 2332 // beginning of CMTask::do_marking_step() for those conditions - one of which
2176 2333 // is reaching the specified time target.) It is only when
2177 2334 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2178 2335 // that the marking has completed.
2179 2336
2180 2337 _task->do_marking_step(1000000000.0 /* something very large */,
2181 2338 true /* do_stealing */,
2182 2339 true /* do_termination */);
2183 2340 } while (_task->has_aborted() && !_cm->has_overflown());
2184 2341 }
2185 2342 };
2186 2343
2187 2344 // Implementation of AbstractRefProcTaskExecutor for parallel
2188 2345 // reference processing at the end of G1 concurrent marking
2189 2346
2190 2347 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2191 2348 private:
2192 2349 G1CollectedHeap* _g1h;
2193 2350 ConcurrentMark* _cm;
2194 2351 WorkGang* _workers;
2195 2352 int _active_workers;
2196 2353
2197 2354 public:
2198 2355 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2199 2356 ConcurrentMark* cm,
2200 2357 WorkGang* workers,
2201 2358 int n_workers) :
2202 2359 _g1h(g1h), _cm(cm),
2203 2360 _workers(workers), _active_workers(n_workers) { }
2204 2361
2205 2362 // Executes the given task using concurrent marking worker threads.
2206 2363 virtual void execute(ProcessTask& task);
2207 2364 virtual void execute(EnqueueTask& task);
2208 2365 };
2209 2366
2210 2367 class G1CMRefProcTaskProxy: public AbstractGangTask {
2211 2368 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2212 2369 ProcessTask& _proc_task;
2213 2370 G1CollectedHeap* _g1h;
2214 2371 ConcurrentMark* _cm;
2215 2372
2216 2373 public:
2217 2374 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2218 2375 G1CollectedHeap* g1h,
2219 2376 ConcurrentMark* cm) :
2220 2377 AbstractGangTask("Process reference objects in parallel"),
2221 2378 _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2222 2379
2223 2380 virtual void work(uint worker_id) {
2224 2381 CMTask* marking_task = _cm->task(worker_id);
2225 2382 G1CMIsAliveClosure g1_is_alive(_g1h);
2226 2383 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2227 2384 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2228 2385
2229 2386 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2230 2387 }
2231 2388 };
2232 2389
2233 2390 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2234 2391 assert(_workers != NULL, "Need parallel worker threads.");
2235 2392
2236 2393 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2237 2394
2238 2395 // We need to reset the phase for each task execution so that
2239 2396 // the termination protocol of CMTask::do_marking_step works.
2240 2397 _cm->set_phase(_active_workers, false /* concurrent */);
2241 2398 _g1h->set_par_threads(_active_workers);
2242 2399 _workers->run_task(&proc_task_proxy);
2243 2400 _g1h->set_par_threads(0);
2244 2401 }
2245 2402
2246 2403 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2247 2404 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2248 2405 EnqueueTask& _enq_task;
2249 2406
2250 2407 public:
2251 2408 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2252 2409 AbstractGangTask("Enqueue reference objects in parallel"),
2253 2410 _enq_task(enq_task) { }
2254 2411
2255 2412 virtual void work(uint worker_id) {
2256 2413 _enq_task.work(worker_id);
2257 2414 }
2258 2415 };
2259 2416
2260 2417 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2261 2418 assert(_workers != NULL, "Need parallel worker threads.");
2262 2419
2263 2420 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2264 2421
2265 2422 _g1h->set_par_threads(_active_workers);
2266 2423 _workers->run_task(&enq_task_proxy);
2267 2424 _g1h->set_par_threads(0);
2268 2425 }
2269 2426
2270 2427 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2271 2428 ResourceMark rm;
2272 2429 HandleMark hm;
2273 2430
2274 2431 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2275 2432
2276 2433 // Is alive closure.
2277 2434 G1CMIsAliveClosure g1_is_alive(g1h);
2278 2435
2279 2436 // Inner scope to exclude the cleaning of the string and symbol
2280 2437 // tables from the displayed time.
2281 2438 {
2282 2439 if (G1Log::finer()) {
2283 2440 gclog_or_tty->put(' ');
2284 2441 }
2285 2442 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2286 2443
2287 2444 ReferenceProcessor* rp = g1h->ref_processor_cm();
2288 2445
2289 2446 // See the comment in G1CollectedHeap::ref_processing_init()
2290 2447 // about how reference processing currently works in G1.
2291 2448
2292 2449 // Process weak references.
2293 2450 rp->setup_policy(clear_all_soft_refs);
2294 2451 assert(_markStack.isEmpty(), "mark stack should be empty");
2295 2452
2296 2453 G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2297 2454 G1CMDrainMarkingStackClosure
2298 2455 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2299 2456
2300 2457 // We use the work gang from the G1CollectedHeap and we utilize all
2301 2458 // the worker threads.
2302 2459 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2303 2460 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2304 2461
2305 2462 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2306 2463 g1h->workers(), active_workers);
2307 2464
2308 2465 if (rp->processing_is_mt()) {
2309 2466 // Set the degree of MT here. If the discovery is done MT, there
2310 2467 // may have been a different number of threads doing the discovery
2311 2468 // and a different number of discovered lists may have Ref objects.
2312 2469 // That is OK as long as the Reference lists are balanced (see
2313 2470 // balance_all_queues() and balance_queues()).
2314 2471 rp->set_active_mt_degree(active_workers);
2315 2472
2316 2473 rp->process_discovered_references(&g1_is_alive,
2317 2474 &g1_keep_alive,
2318 2475 &g1_drain_mark_stack,
2319 2476 &par_task_executor);
2320 2477
2321 2478 // The work routines of the parallel keep_alive and drain_marking_stack
2322 2479 // will set the has_overflown flag if we overflow the global marking
2323 2480 // stack.
2324 2481 } else {
2325 2482 rp->process_discovered_references(&g1_is_alive,
2326 2483 &g1_keep_alive,
2327 2484 &g1_drain_mark_stack,
2328 2485 NULL);
2329 2486 }
2330 2487
2331 2488 assert(_markStack.overflow() || _markStack.isEmpty(),
2332 2489 "mark stack should be empty (unless it overflowed)");
2333 2490 if (_markStack.overflow()) {
2334 2491 // Should have been done already when we tried to push an
2335 2492 // entry on to the global mark stack. But let's do it again.
2336 2493 set_has_overflown();
2337 2494 }
2338 2495
2339 2496 if (rp->processing_is_mt()) {
2340 2497 assert(rp->num_q() == active_workers, "why not");
2341 2498 rp->enqueue_discovered_references(&par_task_executor);
2342 2499 } else {
2343 2500 rp->enqueue_discovered_references();
2344 2501 }
2345 2502
2346 2503 rp->verify_no_references_recorded();
2347 2504 assert(!rp->discovery_enabled(), "Post condition");
2348 2505 }
2349 2506
2350 2507 // Now clean up stale oops in StringTable
2351 2508 StringTable::unlink(&g1_is_alive);
2352 2509 // Clean up unreferenced symbols in symbol table.
2353 2510 SymbolTable::unlink();
2354 2511 }
2355 2512
2356 2513 void ConcurrentMark::swapMarkBitMaps() {
2357 2514 CMBitMapRO* temp = _prevMarkBitMap;
2358 2515 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2359 2516 _nextMarkBitMap = (CMBitMap*) temp;
2360 2517 }
2361 2518
2362 2519 class CMRemarkTask: public AbstractGangTask {
2363 2520 private:
2364 2521 ConcurrentMark *_cm;
2365 2522
2366 2523 public:
2367 2524 void work(uint worker_id) {
2368 2525 // Since all available tasks are actually started, we should
2369 2526 // only proceed if we're supposed to be actived.
2370 2527 if (worker_id < _cm->active_tasks()) {
2371 2528 CMTask* task = _cm->task(worker_id);
2372 2529 task->record_start_time();
2373 2530 do {
2374 2531 task->do_marking_step(1000000000.0 /* something very large */,
2375 2532 true /* do_stealing */,
2376 2533 true /* do_termination */);
2377 2534 } while (task->has_aborted() && !_cm->has_overflown());
2378 2535 // If we overflow, then we do not want to restart. We instead
2379 2536 // want to abort remark and do concurrent marking again.
2380 2537 task->record_end_time();
2381 2538 }
2382 2539 }
2383 2540
2384 2541 CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2385 2542 AbstractGangTask("Par Remark"), _cm(cm) {
2386 2543 _cm->terminator()->reset_for_reuse(active_workers);
2387 2544 }
2388 2545 };
2389 2546
2390 2547 void ConcurrentMark::checkpointRootsFinalWork() {
2391 2548 ResourceMark rm;
2392 2549 HandleMark hm;
2393 2550 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2394 2551
2395 2552 g1h->ensure_parsability(false);
2396 2553
2397 2554 if (G1CollectedHeap::use_parallel_gc_threads()) {
2398 2555 G1CollectedHeap::StrongRootsScope srs(g1h);
2399 2556 // this is remark, so we'll use up all active threads
2400 2557 uint active_workers = g1h->workers()->active_workers();
2401 2558 if (active_workers == 0) {
2402 2559 assert(active_workers > 0, "Should have been set earlier");
2403 2560 active_workers = (uint) ParallelGCThreads;
2404 2561 g1h->workers()->set_active_workers(active_workers);
2405 2562 }
2406 2563 set_phase(active_workers, false /* concurrent */);
2407 2564 // Leave _parallel_marking_threads at it's
2408 2565 // value originally calculated in the ConcurrentMark
2409 2566 // constructor and pass values of the active workers
2410 2567 // through the gang in the task.
2411 2568
2412 2569 CMRemarkTask remarkTask(this, active_workers);
2413 2570 g1h->set_par_threads(active_workers);
2414 2571 g1h->workers()->run_task(&remarkTask);
2415 2572 g1h->set_par_threads(0);
2416 2573 } else {
2417 2574 G1CollectedHeap::StrongRootsScope srs(g1h);
2418 2575 // this is remark, so we'll use up all available threads
2419 2576 uint active_workers = 1;
2420 2577 set_phase(active_workers, false /* concurrent */);
2421 2578
2422 2579 CMRemarkTask remarkTask(this, active_workers);
2423 2580 // We will start all available threads, even if we decide that the
2424 2581 // active_workers will be fewer. The extra ones will just bail out
2425 2582 // immediately.
2426 2583 remarkTask.work(0);
2427 2584 }
2428 2585 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2429 2586 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2430 2587
2431 2588 print_stats();
2432 2589
2433 2590 #if VERIFY_OBJS_PROCESSED
2434 2591 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2435 2592 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2436 2593 _scan_obj_cl.objs_processed,
2437 2594 ThreadLocalObjQueue::objs_enqueued);
2438 2595 guarantee(_scan_obj_cl.objs_processed ==
2439 2596 ThreadLocalObjQueue::objs_enqueued,
2440 2597 "Different number of objs processed and enqueued.");
2441 2598 }
2442 2599 #endif
2443 2600 }
2444 2601
2445 2602 #ifndef PRODUCT
2446 2603
2447 2604 class PrintReachableOopClosure: public OopClosure {
2448 2605 private:
2449 2606 G1CollectedHeap* _g1h;
2450 2607 outputStream* _out;
2451 2608 VerifyOption _vo;
2452 2609 bool _all;
2453 2610
2454 2611 public:
2455 2612 PrintReachableOopClosure(outputStream* out,
2456 2613 VerifyOption vo,
2457 2614 bool all) :
2458 2615 _g1h(G1CollectedHeap::heap()),
2459 2616 _out(out), _vo(vo), _all(all) { }
2460 2617
2461 2618 void do_oop(narrowOop* p) { do_oop_work(p); }
2462 2619 void do_oop( oop* p) { do_oop_work(p); }
2463 2620
2464 2621 template <class T> void do_oop_work(T* p) {
2465 2622 oop obj = oopDesc::load_decode_heap_oop(p);
2466 2623 const char* str = NULL;
2467 2624 const char* str2 = "";
2468 2625
2469 2626 if (obj == NULL) {
2470 2627 str = "";
2471 2628 } else if (!_g1h->is_in_g1_reserved(obj)) {
2472 2629 str = " O";
2473 2630 } else {
2474 2631 HeapRegion* hr = _g1h->heap_region_containing(obj);
2475 2632 guarantee(hr != NULL, "invariant");
2476 2633 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2477 2634 bool marked = _g1h->is_marked(obj, _vo);
2478 2635
2479 2636 if (over_tams) {
2480 2637 str = " >";
2481 2638 if (marked) {
2482 2639 str2 = " AND MARKED";
2483 2640 }
2484 2641 } else if (marked) {
2485 2642 str = " M";
2486 2643 } else {
2487 2644 str = " NOT";
2488 2645 }
2489 2646 }
2490 2647
2491 2648 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2492 2649 p, (void*) obj, str, str2);
2493 2650 }
2494 2651 };
2495 2652
2496 2653 class PrintReachableObjectClosure : public ObjectClosure {
2497 2654 private:
2498 2655 G1CollectedHeap* _g1h;
2499 2656 outputStream* _out;
2500 2657 VerifyOption _vo;
2501 2658 bool _all;
2502 2659 HeapRegion* _hr;
2503 2660
2504 2661 public:
2505 2662 PrintReachableObjectClosure(outputStream* out,
2506 2663 VerifyOption vo,
2507 2664 bool all,
2508 2665 HeapRegion* hr) :
2509 2666 _g1h(G1CollectedHeap::heap()),
2510 2667 _out(out), _vo(vo), _all(all), _hr(hr) { }
2511 2668
2512 2669 void do_object(oop o) {
2513 2670 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2514 2671 bool marked = _g1h->is_marked(o, _vo);
2515 2672 bool print_it = _all || over_tams || marked;
2516 2673
2517 2674 if (print_it) {
2518 2675 _out->print_cr(" "PTR_FORMAT"%s",
2519 2676 o, (over_tams) ? " >" : (marked) ? " M" : "");
2520 2677 PrintReachableOopClosure oopCl(_out, _vo, _all);
2521 2678 o->oop_iterate_no_header(&oopCl);
2522 2679 }
2523 2680 }
2524 2681 };
2525 2682
2526 2683 class PrintReachableRegionClosure : public HeapRegionClosure {
2527 2684 private:
2528 2685 G1CollectedHeap* _g1h;
2529 2686 outputStream* _out;
2530 2687 VerifyOption _vo;
2531 2688 bool _all;
2532 2689
2533 2690 public:
2534 2691 bool doHeapRegion(HeapRegion* hr) {
2535 2692 HeapWord* b = hr->bottom();
2536 2693 HeapWord* e = hr->end();
2537 2694 HeapWord* t = hr->top();
2538 2695 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2539 2696 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2540 2697 "TAMS: "PTR_FORMAT, b, e, t, p);
2541 2698 _out->cr();
2542 2699
2543 2700 HeapWord* from = b;
2544 2701 HeapWord* to = t;
2545 2702
2546 2703 if (to > from) {
2547 2704 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2548 2705 _out->cr();
2549 2706 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2550 2707 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2551 2708 _out->cr();
2552 2709 }
2553 2710
2554 2711 return false;
2555 2712 }
2556 2713
2557 2714 PrintReachableRegionClosure(outputStream* out,
2558 2715 VerifyOption vo,
2559 2716 bool all) :
2560 2717 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2561 2718 };
2562 2719
2563 2720 void ConcurrentMark::print_reachable(const char* str,
2564 2721 VerifyOption vo,
2565 2722 bool all) {
2566 2723 gclog_or_tty->cr();
2567 2724 gclog_or_tty->print_cr("== Doing heap dump... ");
2568 2725
2569 2726 if (G1PrintReachableBaseFile == NULL) {
2570 2727 gclog_or_tty->print_cr(" #### error: no base file defined");
2571 2728 return;
2572 2729 }
2573 2730
2574 2731 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2575 2732 (JVM_MAXPATHLEN - 1)) {
2576 2733 gclog_or_tty->print_cr(" #### error: file name too long");
2577 2734 return;
2578 2735 }
2579 2736
2580 2737 char file_name[JVM_MAXPATHLEN];
2581 2738 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2582 2739 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2583 2740
2584 2741 fileStream fout(file_name);
2585 2742 if (!fout.is_open()) {
2586 2743 gclog_or_tty->print_cr(" #### error: could not open file");
2587 2744 return;
2588 2745 }
2589 2746
2590 2747 outputStream* out = &fout;
2591 2748 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2592 2749 out->cr();
2593 2750
2594 2751 out->print_cr("--- ITERATING OVER REGIONS");
2595 2752 out->cr();
2596 2753 PrintReachableRegionClosure rcl(out, vo, all);
2597 2754 _g1h->heap_region_iterate(&rcl);
2598 2755 out->cr();
2599 2756
2600 2757 gclog_or_tty->print_cr(" done");
2601 2758 gclog_or_tty->flush();
2602 2759 }
2603 2760
2604 2761 #endif // PRODUCT
2605 2762
2606 2763 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2607 2764 // Note we are overriding the read-only view of the prev map here, via
2608 2765 // the cast.
2609 2766 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2610 2767 }
2611 2768
2612 2769 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2613 2770 _nextMarkBitMap->clearRange(mr);
2614 2771 }
2615 2772
2616 2773 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2617 2774 clearRangePrevBitmap(mr);
2618 2775 clearRangeNextBitmap(mr);
2619 2776 }
2620 2777
2621 2778 HeapRegion*
2622 2779 ConcurrentMark::claim_region(uint worker_id) {
2623 2780 // "checkpoint" the finger
2624 2781 HeapWord* finger = _finger;
2625 2782
2626 2783 // _heap_end will not change underneath our feet; it only changes at
2627 2784 // yield points.
2628 2785 while (finger < _heap_end) {
2629 2786 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2630 2787
2631 2788 // Note on how this code handles humongous regions. In the
2632 2789 // normal case the finger will reach the start of a "starts
2633 2790 // humongous" (SH) region. Its end will either be the end of the
2634 2791 // last "continues humongous" (CH) region in the sequence, or the
2635 2792 // standard end of the SH region (if the SH is the only region in
2636 2793 // the sequence). That way claim_region() will skip over the CH
2637 2794 // regions. However, there is a subtle race between a CM thread
2638 2795 // executing this method and a mutator thread doing a humongous
2639 2796 // object allocation. The two are not mutually exclusive as the CM
2640 2797 // thread does not need to hold the Heap_lock when it gets
2641 2798 // here. So there is a chance that claim_region() will come across
2642 2799 // a free region that's in the progress of becoming a SH or a CH
2643 2800 // region. In the former case, it will either
2644 2801 // a) Miss the update to the region's end, in which case it will
2645 2802 // visit every subsequent CH region, will find their bitmaps
2646 2803 // empty, and do nothing, or
2647 2804 // b) Will observe the update of the region's end (in which case
2648 2805 // it will skip the subsequent CH regions).
2649 2806 // If it comes across a region that suddenly becomes CH, the
2650 2807 // scenario will be similar to b). So, the race between
2651 2808 // claim_region() and a humongous object allocation might force us
2652 2809 // to do a bit of unnecessary work (due to some unnecessary bitmap
2653 2810 // iterations) but it should not introduce and correctness issues.
2654 2811 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2655 2812 HeapWord* bottom = curr_region->bottom();
2656 2813 HeapWord* end = curr_region->end();
2657 2814 HeapWord* limit = curr_region->next_top_at_mark_start();
2658 2815
2659 2816 if (verbose_low()) {
2660 2817 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2661 2818 "["PTR_FORMAT", "PTR_FORMAT"), "
2662 2819 "limit = "PTR_FORMAT,
2663 2820 worker_id, curr_region, bottom, end, limit);
2664 2821 }
2665 2822
2666 2823 // Is the gap between reading the finger and doing the CAS too long?
2667 2824 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2668 2825 if (res == finger) {
2669 2826 // we succeeded
2670 2827
2671 2828 // notice that _finger == end cannot be guaranteed here since,
2672 2829 // someone else might have moved the finger even further
2673 2830 assert(_finger >= end, "the finger should have moved forward");
2674 2831
2675 2832 if (verbose_low()) {
2676 2833 gclog_or_tty->print_cr("[%u] we were successful with region = "
2677 2834 PTR_FORMAT, worker_id, curr_region);
2678 2835 }
2679 2836
2680 2837 if (limit > bottom) {
2681 2838 if (verbose_low()) {
2682 2839 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2683 2840 "returning it ", worker_id, curr_region);
2684 2841 }
2685 2842 return curr_region;
2686 2843 } else {
2687 2844 assert(limit == bottom,
2688 2845 "the region limit should be at bottom");
2689 2846 if (verbose_low()) {
2690 2847 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2691 2848 "returning NULL", worker_id, curr_region);
2692 2849 }
2693 2850 // we return NULL and the caller should try calling
2694 2851 // claim_region() again.
2695 2852 return NULL;
2696 2853 }
2697 2854 } else {
2698 2855 assert(_finger > finger, "the finger should have moved forward");
2699 2856 if (verbose_low()) {
2700 2857 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2701 2858 "global finger = "PTR_FORMAT", "
2702 2859 "our finger = "PTR_FORMAT,
2703 2860 worker_id, _finger, finger);
2704 2861 }
2705 2862
2706 2863 // read it again
2707 2864 finger = _finger;
2708 2865 }
2709 2866 }
2710 2867
2711 2868 return NULL;
2712 2869 }
2713 2870
2714 2871 #ifndef PRODUCT
2715 2872 enum VerifyNoCSetOopsPhase {
2716 2873 VerifyNoCSetOopsStack,
2717 2874 VerifyNoCSetOopsQueues,
2718 2875 VerifyNoCSetOopsSATBCompleted,
2719 2876 VerifyNoCSetOopsSATBThread
2720 2877 };
2721 2878
2722 2879 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
2723 2880 private:
2724 2881 G1CollectedHeap* _g1h;
2725 2882 VerifyNoCSetOopsPhase _phase;
2726 2883 int _info;
2727 2884
2728 2885 const char* phase_str() {
2729 2886 switch (_phase) {
2730 2887 case VerifyNoCSetOopsStack: return "Stack";
2731 2888 case VerifyNoCSetOopsQueues: return "Queue";
2732 2889 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2733 2890 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
2734 2891 default: ShouldNotReachHere();
2735 2892 }
2736 2893 return NULL;
2737 2894 }
2738 2895
2739 2896 void do_object_work(oop obj) {
2740 2897 guarantee(!_g1h->obj_in_cs(obj),
2741 2898 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2742 2899 (void*) obj, phase_str(), _info));
2743 2900 }
2744 2901
2745 2902 public:
2746 2903 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2747 2904
2748 2905 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2749 2906 _phase = phase;
2750 2907 _info = info;
2751 2908 }
2752 2909
2753 2910 virtual void do_oop(oop* p) {
2754 2911 oop obj = oopDesc::load_decode_heap_oop(p);
2755 2912 do_object_work(obj);
2756 2913 }
2757 2914
2758 2915 virtual void do_oop(narrowOop* p) {
2759 2916 // We should not come across narrow oops while scanning marking
2760 2917 // stacks and SATB buffers.
2761 2918 ShouldNotReachHere();
2762 2919 }
2763 2920
2764 2921 virtual void do_object(oop obj) {
2765 2922 do_object_work(obj);
2766 2923 }
2767 2924 };
2768 2925
2769 2926 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2770 2927 bool verify_enqueued_buffers,
2771 2928 bool verify_thread_buffers,
2772 2929 bool verify_fingers) {
2773 2930 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2774 2931 if (!G1CollectedHeap::heap()->mark_in_progress()) {
2775 2932 return;
2776 2933 }
2777 2934
↓ open down ↓ |
1600 lines elided |
↑ open up ↑ |
2778 2935 VerifyNoCSetOopsClosure cl;
2779 2936
2780 2937 if (verify_stacks) {
2781 2938 // Verify entries on the global mark stack
2782 2939 cl.set_phase(VerifyNoCSetOopsStack);
2783 2940 _markStack.oops_do(&cl);
2784 2941
2785 2942 // Verify entries on the task queues
2786 2943 for (uint i = 0; i < _max_worker_id; i += 1) {
2787 2944 cl.set_phase(VerifyNoCSetOopsQueues, i);
2788 - OopTaskQueue* queue = _task_queues->queue(i);
2945 + CMTaskQueue* queue = _task_queues->queue(i);
2789 2946 queue->oops_do(&cl);
2790 2947 }
2791 2948 }
2792 2949
2793 2950 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2794 2951
2795 2952 // Verify entries on the enqueued SATB buffers
2796 2953 if (verify_enqueued_buffers) {
2797 2954 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2798 2955 satb_qs.iterate_completed_buffers_read_only(&cl);
2799 2956 }
2800 2957
2801 2958 // Verify entries on the per-thread SATB buffers
2802 2959 if (verify_thread_buffers) {
2803 2960 cl.set_phase(VerifyNoCSetOopsSATBThread);
2804 2961 satb_qs.iterate_thread_buffers_read_only(&cl);
2805 2962 }
2806 2963
2807 2964 if (verify_fingers) {
2808 2965 // Verify the global finger
2809 2966 HeapWord* global_finger = finger();
2810 2967 if (global_finger != NULL && global_finger < _heap_end) {
2811 2968 // The global finger always points to a heap region boundary. We
2812 2969 // use heap_region_containing_raw() to get the containing region
2813 2970 // given that the global finger could be pointing to a free region
2814 2971 // which subsequently becomes continues humongous. If that
2815 2972 // happens, heap_region_containing() will return the bottom of the
2816 2973 // corresponding starts humongous region and the check below will
2817 2974 // not hold any more.
2818 2975 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2819 2976 guarantee(global_finger == global_hr->bottom(),
2820 2977 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2821 2978 global_finger, HR_FORMAT_PARAMS(global_hr)));
2822 2979 }
2823 2980
2824 2981 // Verify the task fingers
2825 2982 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2826 2983 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2827 2984 CMTask* task = _tasks[i];
2828 2985 HeapWord* task_finger = task->finger();
2829 2986 if (task_finger != NULL && task_finger < _heap_end) {
2830 2987 // See above note on the global finger verification.
2831 2988 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2832 2989 guarantee(task_finger == task_hr->bottom() ||
↓ open down ↓ |
34 lines elided |
↑ open up ↑ |
2833 2990 !task_hr->in_collection_set(),
2834 2991 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2835 2992 task_finger, HR_FORMAT_PARAMS(task_hr)));
2836 2993 }
2837 2994 }
2838 2995 }
2839 2996 }
2840 2997 #endif // PRODUCT
2841 2998
2842 2999 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2843 - _markStack.setEmpty();
2844 - _markStack.clear_overflow();
3000 + _markStack.set_should_expand();
3001 + _markStack.setEmpty(); // Also clears the _markStack overflow flag
2845 3002 if (clear_overflow) {
2846 3003 clear_has_overflown();
2847 3004 } else {
2848 3005 assert(has_overflown(), "pre-condition");
2849 3006 }
2850 3007 _finger = _heap_start;
2851 3008
2852 3009 for (uint i = 0; i < _max_worker_id; ++i) {
2853 - OopTaskQueue* queue = _task_queues->queue(i);
3010 + CMTaskQueue* queue = _task_queues->queue(i);
2854 3011 queue->set_empty();
2855 3012 }
2856 3013 }
2857 3014
2858 3015 // Aggregate the counting data that was constructed concurrently
2859 3016 // with marking.
2860 3017 class AggregateCountDataHRClosure: public HeapRegionClosure {
2861 3018 G1CollectedHeap* _g1h;
2862 3019 ConcurrentMark* _cm;
2863 3020 CardTableModRefBS* _ct_bs;
2864 3021 BitMap* _cm_card_bm;
2865 3022 uint _max_worker_id;
2866 3023
2867 3024 public:
2868 3025 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2869 3026 BitMap* cm_card_bm,
2870 3027 uint max_worker_id) :
2871 3028 _g1h(g1h), _cm(g1h->concurrent_mark()),
2872 3029 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2873 3030 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2874 3031
2875 3032 bool doHeapRegion(HeapRegion* hr) {
2876 3033 if (hr->continuesHumongous()) {
2877 3034 // We will ignore these here and process them when their
2878 3035 // associated "starts humongous" region is processed.
2879 3036 // Note that we cannot rely on their associated
2880 3037 // "starts humongous" region to have their bit set to 1
2881 3038 // since, due to the region chunking in the parallel region
2882 3039 // iteration, a "continues humongous" region might be visited
2883 3040 // before its associated "starts humongous".
2884 3041 return false;
2885 3042 }
2886 3043
2887 3044 HeapWord* start = hr->bottom();
2888 3045 HeapWord* limit = hr->next_top_at_mark_start();
2889 3046 HeapWord* end = hr->end();
2890 3047
2891 3048 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2892 3049 err_msg("Preconditions not met - "
2893 3050 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2894 3051 "top: "PTR_FORMAT", end: "PTR_FORMAT,
2895 3052 start, limit, hr->top(), hr->end()));
2896 3053
2897 3054 assert(hr->next_marked_bytes() == 0, "Precondition");
2898 3055
2899 3056 if (start == limit) {
2900 3057 // NTAMS of this region has not been set so nothing to do.
2901 3058 return false;
2902 3059 }
2903 3060
2904 3061 // 'start' should be in the heap.
2905 3062 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2906 3063 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
2907 3064 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2908 3065
2909 3066 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2910 3067 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2911 3068 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2912 3069
2913 3070 // If ntams is not card aligned then we bump card bitmap index
2914 3071 // for limit so that we get the all the cards spanned by
2915 3072 // the object ending at ntams.
2916 3073 // Note: if this is the last region in the heap then ntams
2917 3074 // could be actually just beyond the end of the the heap;
2918 3075 // limit_idx will then correspond to a (non-existent) card
2919 3076 // that is also outside the heap.
2920 3077 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2921 3078 limit_idx += 1;
2922 3079 }
2923 3080
2924 3081 assert(limit_idx <= end_idx, "or else use atomics");
2925 3082
2926 3083 // Aggregate the "stripe" in the count data associated with hr.
2927 3084 uint hrs_index = hr->hrs_index();
2928 3085 size_t marked_bytes = 0;
2929 3086
2930 3087 for (uint i = 0; i < _max_worker_id; i += 1) {
2931 3088 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2932 3089 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2933 3090
2934 3091 // Fetch the marked_bytes in this region for task i and
2935 3092 // add it to the running total for this region.
2936 3093 marked_bytes += marked_bytes_array[hrs_index];
2937 3094
2938 3095 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2939 3096 // into the global card bitmap.
2940 3097 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2941 3098
2942 3099 while (scan_idx < limit_idx) {
2943 3100 assert(task_card_bm->at(scan_idx) == true, "should be");
2944 3101 _cm_card_bm->set_bit(scan_idx);
2945 3102 assert(_cm_card_bm->at(scan_idx) == true, "should be");
2946 3103
2947 3104 // BitMap::get_next_one_offset() can handle the case when
2948 3105 // its left_offset parameter is greater than its right_offset
2949 3106 // parameter. It does, however, have an early exit if
2950 3107 // left_offset == right_offset. So let's limit the value
2951 3108 // passed in for left offset here.
2952 3109 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2953 3110 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2954 3111 }
2955 3112 }
2956 3113
2957 3114 // Update the marked bytes for this region.
2958 3115 hr->add_to_marked_bytes(marked_bytes);
2959 3116
2960 3117 // Next heap region
2961 3118 return false;
2962 3119 }
2963 3120 };
2964 3121
2965 3122 class G1AggregateCountDataTask: public AbstractGangTask {
2966 3123 protected:
2967 3124 G1CollectedHeap* _g1h;
2968 3125 ConcurrentMark* _cm;
2969 3126 BitMap* _cm_card_bm;
2970 3127 uint _max_worker_id;
2971 3128 int _active_workers;
2972 3129
2973 3130 public:
2974 3131 G1AggregateCountDataTask(G1CollectedHeap* g1h,
2975 3132 ConcurrentMark* cm,
2976 3133 BitMap* cm_card_bm,
2977 3134 uint max_worker_id,
2978 3135 int n_workers) :
2979 3136 AbstractGangTask("Count Aggregation"),
2980 3137 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2981 3138 _max_worker_id(max_worker_id),
2982 3139 _active_workers(n_workers) { }
2983 3140
2984 3141 void work(uint worker_id) {
2985 3142 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2986 3143
2987 3144 if (G1CollectedHeap::use_parallel_gc_threads()) {
2988 3145 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2989 3146 _active_workers,
2990 3147 HeapRegion::AggregateCountClaimValue);
2991 3148 } else {
2992 3149 _g1h->heap_region_iterate(&cl);
2993 3150 }
2994 3151 }
2995 3152 };
2996 3153
2997 3154
2998 3155 void ConcurrentMark::aggregate_count_data() {
2999 3156 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3000 3157 _g1h->workers()->active_workers() :
3001 3158 1);
3002 3159
3003 3160 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3004 3161 _max_worker_id, n_workers);
3005 3162
3006 3163 if (G1CollectedHeap::use_parallel_gc_threads()) {
3007 3164 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3008 3165 "sanity check");
3009 3166 _g1h->set_par_threads(n_workers);
3010 3167 _g1h->workers()->run_task(&g1_par_agg_task);
3011 3168 _g1h->set_par_threads(0);
3012 3169
3013 3170 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3014 3171 "sanity check");
3015 3172 _g1h->reset_heap_region_claim_values();
3016 3173 } else {
3017 3174 g1_par_agg_task.work(0);
3018 3175 }
3019 3176 }
3020 3177
3021 3178 // Clear the per-worker arrays used to store the per-region counting data
3022 3179 void ConcurrentMark::clear_all_count_data() {
3023 3180 // Clear the global card bitmap - it will be filled during
3024 3181 // liveness count aggregation (during remark) and the
3025 3182 // final counting task.
3026 3183 _card_bm.clear();
3027 3184
3028 3185 // Clear the global region bitmap - it will be filled as part
3029 3186 // of the final counting task.
3030 3187 _region_bm.clear();
3031 3188
3032 3189 uint max_regions = _g1h->max_regions();
3033 3190 assert(_max_worker_id > 0, "uninitialized");
3034 3191
3035 3192 for (uint i = 0; i < _max_worker_id; i += 1) {
3036 3193 BitMap* task_card_bm = count_card_bitmap_for(i);
3037 3194 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3038 3195
3039 3196 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3040 3197 assert(marked_bytes_array != NULL, "uninitialized");
3041 3198
3042 3199 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3043 3200 task_card_bm->clear();
3044 3201 }
3045 3202 }
3046 3203
3047 3204 void ConcurrentMark::print_stats() {
3048 3205 if (verbose_stats()) {
3049 3206 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3050 3207 for (size_t i = 0; i < _active_tasks; ++i) {
3051 3208 _tasks[i]->print_stats();
3052 3209 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3053 3210 }
3054 3211 }
3055 3212 }
3056 3213
3057 3214 // abandon current marking iteration due to a Full GC
3058 3215 void ConcurrentMark::abort() {
3059 3216 // Clear all marks to force marking thread to do nothing
3060 3217 _nextMarkBitMap->clearAll();
3061 3218 // Clear the liveness counting data
3062 3219 clear_all_count_data();
3063 3220 // Empty mark stack
3064 3221 clear_marking_state();
3065 3222 for (uint i = 0; i < _max_worker_id; ++i) {
3066 3223 _tasks[i]->clear_region_fields();
3067 3224 }
3068 3225 _has_aborted = true;
3069 3226
3070 3227 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3071 3228 satb_mq_set.abandon_partial_marking();
3072 3229 // This can be called either during or outside marking, we'll read
3073 3230 // the expected_active value from the SATB queue set.
3074 3231 satb_mq_set.set_active_all_threads(
3075 3232 false, /* new active value */
3076 3233 satb_mq_set.is_active() /* expected_active */);
3077 3234 }
3078 3235
3079 3236 static void print_ms_time_info(const char* prefix, const char* name,
3080 3237 NumberSeq& ns) {
3081 3238 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3082 3239 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3083 3240 if (ns.num() > 0) {
3084 3241 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3085 3242 prefix, ns.sd(), ns.maximum());
3086 3243 }
3087 3244 }
3088 3245
3089 3246 void ConcurrentMark::print_summary_info() {
3090 3247 gclog_or_tty->print_cr(" Concurrent marking:");
3091 3248 print_ms_time_info(" ", "init marks", _init_times);
3092 3249 print_ms_time_info(" ", "remarks", _remark_times);
3093 3250 {
3094 3251 print_ms_time_info(" ", "final marks", _remark_mark_times);
3095 3252 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3096 3253
3097 3254 }
3098 3255 print_ms_time_info(" ", "cleanups", _cleanup_times);
3099 3256 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3100 3257 _total_counting_time,
3101 3258 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3102 3259 (double)_cleanup_times.num()
3103 3260 : 0.0));
3104 3261 if (G1ScrubRemSets) {
3105 3262 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3106 3263 _total_rs_scrub_time,
3107 3264 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3108 3265 (double)_cleanup_times.num()
3109 3266 : 0.0));
3110 3267 }
3111 3268 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3112 3269 (_init_times.sum() + _remark_times.sum() +
3113 3270 _cleanup_times.sum())/1000.0);
3114 3271 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3115 3272 "(%8.2f s marking).",
3116 3273 cmThread()->vtime_accum(),
3117 3274 cmThread()->vtime_mark_accum());
3118 3275 }
3119 3276
3120 3277 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3121 3278 _parallel_workers->print_worker_threads_on(st);
3122 3279 }
3123 3280
3124 3281 // We take a break if someone is trying to stop the world.
3125 3282 bool ConcurrentMark::do_yield_check(uint worker_id) {
3126 3283 if (should_yield()) {
3127 3284 if (worker_id == 0) {
3128 3285 _g1h->g1_policy()->record_concurrent_pause();
3129 3286 }
3130 3287 cmThread()->yield();
3131 3288 return true;
3132 3289 } else {
3133 3290 return false;
3134 3291 }
3135 3292 }
3136 3293
3137 3294 bool ConcurrentMark::should_yield() {
3138 3295 return cmThread()->should_yield();
3139 3296 }
3140 3297
3141 3298 bool ConcurrentMark::containing_card_is_marked(void* p) {
3142 3299 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3143 3300 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3144 3301 }
3145 3302
3146 3303 bool ConcurrentMark::containing_cards_are_marked(void* start,
3147 3304 void* last) {
3148 3305 return containing_card_is_marked(start) &&
3149 3306 containing_card_is_marked(last);
3150 3307 }
3151 3308
3152 3309 #ifndef PRODUCT
3153 3310 // for debugging purposes
3154 3311 void ConcurrentMark::print_finger() {
3155 3312 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3156 3313 _heap_start, _heap_end, _finger);
3157 3314 for (uint i = 0; i < _max_worker_id; ++i) {
3158 3315 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger());
3159 3316 }
3160 3317 gclog_or_tty->print_cr("");
3161 3318 }
3162 3319 #endif
3163 3320
3164 3321 void CMTask::scan_object(oop obj) {
3165 3322 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3166 3323
3167 3324 if (_cm->verbose_high()) {
3168 3325 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3169 3326 _worker_id, (void*) obj);
3170 3327 }
3171 3328
3172 3329 size_t obj_size = obj->size();
3173 3330 _words_scanned += obj_size;
3174 3331
3175 3332 obj->oop_iterate(_cm_oop_closure);
3176 3333 statsOnly( ++_objs_scanned );
3177 3334 check_limits();
3178 3335 }
3179 3336
3180 3337 // Closure for iteration over bitmaps
3181 3338 class CMBitMapClosure : public BitMapClosure {
3182 3339 private:
3183 3340 // the bitmap that is being iterated over
3184 3341 CMBitMap* _nextMarkBitMap;
3185 3342 ConcurrentMark* _cm;
3186 3343 CMTask* _task;
3187 3344
3188 3345 public:
3189 3346 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3190 3347 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3191 3348
3192 3349 bool do_bit(size_t offset) {
3193 3350 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3194 3351 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3195 3352 assert( addr < _cm->finger(), "invariant");
3196 3353
3197 3354 statsOnly( _task->increase_objs_found_on_bitmap() );
3198 3355 assert(addr >= _task->finger(), "invariant");
3199 3356
3200 3357 // We move that task's local finger along.
3201 3358 _task->move_finger_to(addr);
3202 3359
3203 3360 _task->scan_object(oop(addr));
3204 3361 // we only partially drain the local queue and global stack
3205 3362 _task->drain_local_queue(true);
3206 3363 _task->drain_global_stack(true);
3207 3364
3208 3365 // if the has_aborted flag has been raised, we need to bail out of
3209 3366 // the iteration
3210 3367 return !_task->has_aborted();
3211 3368 }
3212 3369 };
3213 3370
3214 3371 // Closure for iterating over objects, currently only used for
3215 3372 // processing SATB buffers.
3216 3373 class CMObjectClosure : public ObjectClosure {
3217 3374 private:
3218 3375 CMTask* _task;
3219 3376
3220 3377 public:
3221 3378 void do_object(oop obj) {
3222 3379 _task->deal_with_reference(obj);
3223 3380 }
3224 3381
3225 3382 CMObjectClosure(CMTask* task) : _task(task) { }
3226 3383 };
3227 3384
3228 3385 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3229 3386 ConcurrentMark* cm,
3230 3387 CMTask* task)
3231 3388 : _g1h(g1h), _cm(cm), _task(task) {
3232 3389 assert(_ref_processor == NULL, "should be initialized to NULL");
3233 3390
3234 3391 if (G1UseConcMarkReferenceProcessing) {
3235 3392 _ref_processor = g1h->ref_processor_cm();
3236 3393 assert(_ref_processor != NULL, "should not be NULL");
3237 3394 }
3238 3395 }
3239 3396
3240 3397 void CMTask::setup_for_region(HeapRegion* hr) {
3241 3398 // Separated the asserts so that we know which one fires.
3242 3399 assert(hr != NULL,
3243 3400 "claim_region() should have filtered out continues humongous regions");
3244 3401 assert(!hr->continuesHumongous(),
3245 3402 "claim_region() should have filtered out continues humongous regions");
3246 3403
3247 3404 if (_cm->verbose_low()) {
3248 3405 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3249 3406 _worker_id, hr);
3250 3407 }
3251 3408
3252 3409 _curr_region = hr;
3253 3410 _finger = hr->bottom();
3254 3411 update_region_limit();
3255 3412 }
3256 3413
3257 3414 void CMTask::update_region_limit() {
3258 3415 HeapRegion* hr = _curr_region;
3259 3416 HeapWord* bottom = hr->bottom();
3260 3417 HeapWord* limit = hr->next_top_at_mark_start();
3261 3418
3262 3419 if (limit == bottom) {
3263 3420 if (_cm->verbose_low()) {
3264 3421 gclog_or_tty->print_cr("[%u] found an empty region "
3265 3422 "["PTR_FORMAT", "PTR_FORMAT")",
3266 3423 _worker_id, bottom, limit);
3267 3424 }
3268 3425 // The region was collected underneath our feet.
3269 3426 // We set the finger to bottom to ensure that the bitmap
3270 3427 // iteration that will follow this will not do anything.
3271 3428 // (this is not a condition that holds when we set the region up,
3272 3429 // as the region is not supposed to be empty in the first place)
3273 3430 _finger = bottom;
3274 3431 } else if (limit >= _region_limit) {
3275 3432 assert(limit >= _finger, "peace of mind");
3276 3433 } else {
3277 3434 assert(limit < _region_limit, "only way to get here");
3278 3435 // This can happen under some pretty unusual circumstances. An
3279 3436 // evacuation pause empties the region underneath our feet (NTAMS
3280 3437 // at bottom). We then do some allocation in the region (NTAMS
3281 3438 // stays at bottom), followed by the region being used as a GC
3282 3439 // alloc region (NTAMS will move to top() and the objects
3283 3440 // originally below it will be grayed). All objects now marked in
3284 3441 // the region are explicitly grayed, if below the global finger,
3285 3442 // and we do not need in fact to scan anything else. So, we simply
3286 3443 // set _finger to be limit to ensure that the bitmap iteration
3287 3444 // doesn't do anything.
3288 3445 _finger = limit;
3289 3446 }
3290 3447
3291 3448 _region_limit = limit;
3292 3449 }
3293 3450
3294 3451 void CMTask::giveup_current_region() {
3295 3452 assert(_curr_region != NULL, "invariant");
3296 3453 if (_cm->verbose_low()) {
3297 3454 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3298 3455 _worker_id, _curr_region);
3299 3456 }
3300 3457 clear_region_fields();
3301 3458 }
3302 3459
3303 3460 void CMTask::clear_region_fields() {
3304 3461 // Values for these three fields that indicate that we're not
3305 3462 // holding on to a region.
3306 3463 _curr_region = NULL;
3307 3464 _finger = NULL;
3308 3465 _region_limit = NULL;
3309 3466 }
3310 3467
3311 3468 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3312 3469 if (cm_oop_closure == NULL) {
3313 3470 assert(_cm_oop_closure != NULL, "invariant");
3314 3471 } else {
3315 3472 assert(_cm_oop_closure == NULL, "invariant");
3316 3473 }
3317 3474 _cm_oop_closure = cm_oop_closure;
3318 3475 }
3319 3476
3320 3477 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3321 3478 guarantee(nextMarkBitMap != NULL, "invariant");
3322 3479
3323 3480 if (_cm->verbose_low()) {
3324 3481 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3325 3482 }
3326 3483
3327 3484 _nextMarkBitMap = nextMarkBitMap;
3328 3485 clear_region_fields();
3329 3486
3330 3487 _calls = 0;
3331 3488 _elapsed_time_ms = 0.0;
3332 3489 _termination_time_ms = 0.0;
3333 3490 _termination_start_time_ms = 0.0;
3334 3491
3335 3492 #if _MARKING_STATS_
3336 3493 _local_pushes = 0;
3337 3494 _local_pops = 0;
3338 3495 _local_max_size = 0;
3339 3496 _objs_scanned = 0;
3340 3497 _global_pushes = 0;
3341 3498 _global_pops = 0;
3342 3499 _global_max_size = 0;
3343 3500 _global_transfers_to = 0;
3344 3501 _global_transfers_from = 0;
3345 3502 _regions_claimed = 0;
3346 3503 _objs_found_on_bitmap = 0;
3347 3504 _satb_buffers_processed = 0;
3348 3505 _steal_attempts = 0;
3349 3506 _steals = 0;
3350 3507 _aborted = 0;
3351 3508 _aborted_overflow = 0;
3352 3509 _aborted_cm_aborted = 0;
3353 3510 _aborted_yield = 0;
3354 3511 _aborted_timed_out = 0;
3355 3512 _aborted_satb = 0;
3356 3513 _aborted_termination = 0;
3357 3514 #endif // _MARKING_STATS_
3358 3515 }
3359 3516
3360 3517 bool CMTask::should_exit_termination() {
3361 3518 regular_clock_call();
3362 3519 // This is called when we are in the termination protocol. We should
3363 3520 // quit if, for some reason, this task wants to abort or the global
3364 3521 // stack is not empty (this means that we can get work from it).
3365 3522 return !_cm->mark_stack_empty() || has_aborted();
3366 3523 }
3367 3524
3368 3525 void CMTask::reached_limit() {
3369 3526 assert(_words_scanned >= _words_scanned_limit ||
3370 3527 _refs_reached >= _refs_reached_limit ,
3371 3528 "shouldn't have been called otherwise");
3372 3529 regular_clock_call();
3373 3530 }
3374 3531
3375 3532 void CMTask::regular_clock_call() {
3376 3533 if (has_aborted()) return;
3377 3534
3378 3535 // First, we need to recalculate the words scanned and refs reached
3379 3536 // limits for the next clock call.
3380 3537 recalculate_limits();
3381 3538
3382 3539 // During the regular clock call we do the following
3383 3540
3384 3541 // (1) If an overflow has been flagged, then we abort.
3385 3542 if (_cm->has_overflown()) {
3386 3543 set_has_aborted();
3387 3544 return;
3388 3545 }
3389 3546
3390 3547 // If we are not concurrent (i.e. we're doing remark) we don't need
3391 3548 // to check anything else. The other steps are only needed during
3392 3549 // the concurrent marking phase.
3393 3550 if (!concurrent()) return;
3394 3551
3395 3552 // (2) If marking has been aborted for Full GC, then we also abort.
3396 3553 if (_cm->has_aborted()) {
3397 3554 set_has_aborted();
3398 3555 statsOnly( ++_aborted_cm_aborted );
3399 3556 return;
3400 3557 }
3401 3558
3402 3559 double curr_time_ms = os::elapsedVTime() * 1000.0;
3403 3560
3404 3561 // (3) If marking stats are enabled, then we update the step history.
3405 3562 #if _MARKING_STATS_
3406 3563 if (_words_scanned >= _words_scanned_limit) {
3407 3564 ++_clock_due_to_scanning;
3408 3565 }
3409 3566 if (_refs_reached >= _refs_reached_limit) {
3410 3567 ++_clock_due_to_marking;
3411 3568 }
3412 3569
3413 3570 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3414 3571 _interval_start_time_ms = curr_time_ms;
3415 3572 _all_clock_intervals_ms.add(last_interval_ms);
3416 3573
3417 3574 if (_cm->verbose_medium()) {
3418 3575 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3419 3576 "scanned = %d%s, refs reached = %d%s",
3420 3577 _worker_id, last_interval_ms,
3421 3578 _words_scanned,
3422 3579 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3423 3580 _refs_reached,
3424 3581 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3425 3582 }
3426 3583 #endif // _MARKING_STATS_
3427 3584
3428 3585 // (4) We check whether we should yield. If we have to, then we abort.
3429 3586 if (_cm->should_yield()) {
3430 3587 // We should yield. To do this we abort the task. The caller is
3431 3588 // responsible for yielding.
3432 3589 set_has_aborted();
3433 3590 statsOnly( ++_aborted_yield );
3434 3591 return;
3435 3592 }
3436 3593
3437 3594 // (5) We check whether we've reached our time quota. If we have,
3438 3595 // then we abort.
3439 3596 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3440 3597 if (elapsed_time_ms > _time_target_ms) {
3441 3598 set_has_aborted();
3442 3599 _has_timed_out = true;
3443 3600 statsOnly( ++_aborted_timed_out );
3444 3601 return;
3445 3602 }
3446 3603
3447 3604 // (6) Finally, we check whether there are enough completed STAB
3448 3605 // buffers available for processing. If there are, we abort.
3449 3606 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3450 3607 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3451 3608 if (_cm->verbose_low()) {
3452 3609 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3453 3610 _worker_id);
3454 3611 }
3455 3612 // we do need to process SATB buffers, we'll abort and restart
3456 3613 // the marking task to do so
3457 3614 set_has_aborted();
3458 3615 statsOnly( ++_aborted_satb );
3459 3616 return;
3460 3617 }
3461 3618 }
3462 3619
3463 3620 void CMTask::recalculate_limits() {
3464 3621 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3465 3622 _words_scanned_limit = _real_words_scanned_limit;
3466 3623
3467 3624 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3468 3625 _refs_reached_limit = _real_refs_reached_limit;
3469 3626 }
3470 3627
3471 3628 void CMTask::decrease_limits() {
3472 3629 // This is called when we believe that we're going to do an infrequent
3473 3630 // operation which will increase the per byte scanned cost (i.e. move
3474 3631 // entries to/from the global stack). It basically tries to decrease the
3475 3632 // scanning limit so that the clock is called earlier.
3476 3633
3477 3634 if (_cm->verbose_medium()) {
3478 3635 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3479 3636 }
3480 3637
3481 3638 _words_scanned_limit = _real_words_scanned_limit -
3482 3639 3 * words_scanned_period / 4;
3483 3640 _refs_reached_limit = _real_refs_reached_limit -
3484 3641 3 * refs_reached_period / 4;
3485 3642 }
3486 3643
3487 3644 void CMTask::move_entries_to_global_stack() {
3488 3645 // local array where we'll store the entries that will be popped
3489 3646 // from the local queue
3490 3647 oop buffer[global_stack_transfer_size];
3491 3648
3492 3649 int n = 0;
3493 3650 oop obj;
3494 3651 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3495 3652 buffer[n] = obj;
3496 3653 ++n;
3497 3654 }
3498 3655
3499 3656 if (n > 0) {
3500 3657 // we popped at least one entry from the local queue
3501 3658
3502 3659 statsOnly( ++_global_transfers_to; _local_pops += n );
3503 3660
3504 3661 if (!_cm->mark_stack_push(buffer, n)) {
3505 3662 if (_cm->verbose_low()) {
3506 3663 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3507 3664 _worker_id);
3508 3665 }
3509 3666 set_has_aborted();
3510 3667 } else {
3511 3668 // the transfer was successful
3512 3669
3513 3670 if (_cm->verbose_medium()) {
3514 3671 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3515 3672 _worker_id, n);
3516 3673 }
3517 3674 statsOnly( int tmp_size = _cm->mark_stack_size();
3518 3675 if (tmp_size > _global_max_size) {
3519 3676 _global_max_size = tmp_size;
3520 3677 }
3521 3678 _global_pushes += n );
3522 3679 }
3523 3680 }
3524 3681
3525 3682 // this operation was quite expensive, so decrease the limits
3526 3683 decrease_limits();
3527 3684 }
3528 3685
3529 3686 void CMTask::get_entries_from_global_stack() {
3530 3687 // local array where we'll store the entries that will be popped
3531 3688 // from the global stack.
3532 3689 oop buffer[global_stack_transfer_size];
3533 3690 int n;
3534 3691 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3535 3692 assert(n <= global_stack_transfer_size,
3536 3693 "we should not pop more than the given limit");
3537 3694 if (n > 0) {
3538 3695 // yes, we did actually pop at least one entry
3539 3696
3540 3697 statsOnly( ++_global_transfers_from; _global_pops += n );
3541 3698 if (_cm->verbose_medium()) {
3542 3699 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3543 3700 _worker_id, n);
3544 3701 }
3545 3702 for (int i = 0; i < n; ++i) {
3546 3703 bool success = _task_queue->push(buffer[i]);
3547 3704 // We only call this when the local queue is empty or under a
3548 3705 // given target limit. So, we do not expect this push to fail.
3549 3706 assert(success, "invariant");
3550 3707 }
3551 3708
3552 3709 statsOnly( int tmp_size = _task_queue->size();
3553 3710 if (tmp_size > _local_max_size) {
3554 3711 _local_max_size = tmp_size;
3555 3712 }
3556 3713 _local_pushes += n );
3557 3714 }
3558 3715
3559 3716 // this operation was quite expensive, so decrease the limits
3560 3717 decrease_limits();
3561 3718 }
3562 3719
3563 3720 void CMTask::drain_local_queue(bool partially) {
3564 3721 if (has_aborted()) return;
3565 3722
3566 3723 // Decide what the target size is, depending whether we're going to
3567 3724 // drain it partially (so that other tasks can steal if they run out
3568 3725 // of things to do) or totally (at the very end).
3569 3726 size_t target_size;
3570 3727 if (partially) {
3571 3728 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3572 3729 } else {
3573 3730 target_size = 0;
3574 3731 }
3575 3732
3576 3733 if (_task_queue->size() > target_size) {
3577 3734 if (_cm->verbose_high()) {
3578 3735 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3579 3736 _worker_id, target_size);
3580 3737 }
3581 3738
3582 3739 oop obj;
3583 3740 bool ret = _task_queue->pop_local(obj);
3584 3741 while (ret) {
3585 3742 statsOnly( ++_local_pops );
3586 3743
3587 3744 if (_cm->verbose_high()) {
3588 3745 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3589 3746 (void*) obj);
3590 3747 }
3591 3748
3592 3749 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3593 3750 assert(!_g1h->is_on_master_free_list(
3594 3751 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3595 3752
3596 3753 scan_object(obj);
3597 3754
3598 3755 if (_task_queue->size() <= target_size || has_aborted()) {
3599 3756 ret = false;
3600 3757 } else {
3601 3758 ret = _task_queue->pop_local(obj);
3602 3759 }
3603 3760 }
3604 3761
3605 3762 if (_cm->verbose_high()) {
3606 3763 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3607 3764 _worker_id, _task_queue->size());
3608 3765 }
3609 3766 }
3610 3767 }
3611 3768
3612 3769 void CMTask::drain_global_stack(bool partially) {
3613 3770 if (has_aborted()) return;
3614 3771
3615 3772 // We have a policy to drain the local queue before we attempt to
3616 3773 // drain the global stack.
3617 3774 assert(partially || _task_queue->size() == 0, "invariant");
3618 3775
3619 3776 // Decide what the target size is, depending whether we're going to
3620 3777 // drain it partially (so that other tasks can steal if they run out
3621 3778 // of things to do) or totally (at the very end). Notice that,
3622 3779 // because we move entries from the global stack in chunks or
3623 3780 // because another task might be doing the same, we might in fact
3624 3781 // drop below the target. But, this is not a problem.
3625 3782 size_t target_size;
3626 3783 if (partially) {
3627 3784 target_size = _cm->partial_mark_stack_size_target();
3628 3785 } else {
3629 3786 target_size = 0;
3630 3787 }
3631 3788
3632 3789 if (_cm->mark_stack_size() > target_size) {
3633 3790 if (_cm->verbose_low()) {
3634 3791 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3635 3792 _worker_id, target_size);
3636 3793 }
3637 3794
3638 3795 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3639 3796 get_entries_from_global_stack();
3640 3797 drain_local_queue(partially);
3641 3798 }
3642 3799
3643 3800 if (_cm->verbose_low()) {
3644 3801 gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3645 3802 _worker_id, _cm->mark_stack_size());
3646 3803 }
3647 3804 }
3648 3805 }
3649 3806
3650 3807 // SATB Queue has several assumptions on whether to call the par or
3651 3808 // non-par versions of the methods. this is why some of the code is
3652 3809 // replicated. We should really get rid of the single-threaded version
3653 3810 // of the code to simplify things.
3654 3811 void CMTask::drain_satb_buffers() {
3655 3812 if (has_aborted()) return;
3656 3813
3657 3814 // We set this so that the regular clock knows that we're in the
3658 3815 // middle of draining buffers and doesn't set the abort flag when it
3659 3816 // notices that SATB buffers are available for draining. It'd be
3660 3817 // very counter productive if it did that. :-)
3661 3818 _draining_satb_buffers = true;
3662 3819
3663 3820 CMObjectClosure oc(this);
3664 3821 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3665 3822 if (G1CollectedHeap::use_parallel_gc_threads()) {
3666 3823 satb_mq_set.set_par_closure(_worker_id, &oc);
3667 3824 } else {
3668 3825 satb_mq_set.set_closure(&oc);
3669 3826 }
3670 3827
3671 3828 // This keeps claiming and applying the closure to completed buffers
3672 3829 // until we run out of buffers or we need to abort.
3673 3830 if (G1CollectedHeap::use_parallel_gc_threads()) {
3674 3831 while (!has_aborted() &&
3675 3832 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3676 3833 if (_cm->verbose_medium()) {
3677 3834 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3678 3835 }
3679 3836 statsOnly( ++_satb_buffers_processed );
3680 3837 regular_clock_call();
3681 3838 }
3682 3839 } else {
3683 3840 while (!has_aborted() &&
3684 3841 satb_mq_set.apply_closure_to_completed_buffer()) {
3685 3842 if (_cm->verbose_medium()) {
3686 3843 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3687 3844 }
3688 3845 statsOnly( ++_satb_buffers_processed );
3689 3846 regular_clock_call();
3690 3847 }
3691 3848 }
3692 3849
3693 3850 if (!concurrent() && !has_aborted()) {
3694 3851 // We should only do this during remark.
3695 3852 if (G1CollectedHeap::use_parallel_gc_threads()) {
3696 3853 satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3697 3854 } else {
3698 3855 satb_mq_set.iterate_closure_all_threads();
3699 3856 }
3700 3857 }
3701 3858
3702 3859 _draining_satb_buffers = false;
3703 3860
3704 3861 assert(has_aborted() ||
3705 3862 concurrent() ||
3706 3863 satb_mq_set.completed_buffers_num() == 0, "invariant");
3707 3864
3708 3865 if (G1CollectedHeap::use_parallel_gc_threads()) {
3709 3866 satb_mq_set.set_par_closure(_worker_id, NULL);
3710 3867 } else {
3711 3868 satb_mq_set.set_closure(NULL);
3712 3869 }
3713 3870
3714 3871 // again, this was a potentially expensive operation, decrease the
3715 3872 // limits to get the regular clock call early
3716 3873 decrease_limits();
3717 3874 }
3718 3875
3719 3876 void CMTask::print_stats() {
3720 3877 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3721 3878 _worker_id, _calls);
3722 3879 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3723 3880 _elapsed_time_ms, _termination_time_ms);
3724 3881 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3725 3882 _step_times_ms.num(), _step_times_ms.avg(),
3726 3883 _step_times_ms.sd());
3727 3884 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3728 3885 _step_times_ms.maximum(), _step_times_ms.sum());
3729 3886
3730 3887 #if _MARKING_STATS_
3731 3888 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3732 3889 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3733 3890 _all_clock_intervals_ms.sd());
3734 3891 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3735 3892 _all_clock_intervals_ms.maximum(),
3736 3893 _all_clock_intervals_ms.sum());
3737 3894 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3738 3895 _clock_due_to_scanning, _clock_due_to_marking);
3739 3896 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3740 3897 _objs_scanned, _objs_found_on_bitmap);
3741 3898 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3742 3899 _local_pushes, _local_pops, _local_max_size);
3743 3900 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3744 3901 _global_pushes, _global_pops, _global_max_size);
3745 3902 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3746 3903 _global_transfers_to,_global_transfers_from);
3747 3904 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
3748 3905 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3749 3906 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3750 3907 _steal_attempts, _steals);
3751 3908 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3752 3909 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3753 3910 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3754 3911 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3755 3912 _aborted_timed_out, _aborted_satb, _aborted_termination);
3756 3913 #endif // _MARKING_STATS_
3757 3914 }
3758 3915
3759 3916 /*****************************************************************************
3760 3917
3761 3918 The do_marking_step(time_target_ms) method is the building block
3762 3919 of the parallel marking framework. It can be called in parallel
3763 3920 with other invocations of do_marking_step() on different tasks
3764 3921 (but only one per task, obviously) and concurrently with the
3765 3922 mutator threads, or during remark, hence it eliminates the need
3766 3923 for two versions of the code. When called during remark, it will
3767 3924 pick up from where the task left off during the concurrent marking
3768 3925 phase. Interestingly, tasks are also claimable during evacuation
3769 3926 pauses too, since do_marking_step() ensures that it aborts before
3770 3927 it needs to yield.
3771 3928
3772 3929 The data structures that is uses to do marking work are the
3773 3930 following:
3774 3931
3775 3932 (1) Marking Bitmap. If there are gray objects that appear only
3776 3933 on the bitmap (this happens either when dealing with an overflow
3777 3934 or when the initial marking phase has simply marked the roots
3778 3935 and didn't push them on the stack), then tasks claim heap
3779 3936 regions whose bitmap they then scan to find gray objects. A
3780 3937 global finger indicates where the end of the last claimed region
3781 3938 is. A local finger indicates how far into the region a task has
3782 3939 scanned. The two fingers are used to determine how to gray an
3783 3940 object (i.e. whether simply marking it is OK, as it will be
3784 3941 visited by a task in the future, or whether it needs to be also
3785 3942 pushed on a stack).
3786 3943
3787 3944 (2) Local Queue. The local queue of the task which is accessed
3788 3945 reasonably efficiently by the task. Other tasks can steal from
3789 3946 it when they run out of work. Throughout the marking phase, a
3790 3947 task attempts to keep its local queue short but not totally
3791 3948 empty, so that entries are available for stealing by other
3792 3949 tasks. Only when there is no more work, a task will totally
3793 3950 drain its local queue.
3794 3951
3795 3952 (3) Global Mark Stack. This handles local queue overflow. During
3796 3953 marking only sets of entries are moved between it and the local
3797 3954 queues, as access to it requires a mutex and more fine-grain
3798 3955 interaction with it which might cause contention. If it
3799 3956 overflows, then the marking phase should restart and iterate
3800 3957 over the bitmap to identify gray objects. Throughout the marking
3801 3958 phase, tasks attempt to keep the global mark stack at a small
3802 3959 length but not totally empty, so that entries are available for
3803 3960 popping by other tasks. Only when there is no more work, tasks
3804 3961 will totally drain the global mark stack.
3805 3962
3806 3963 (4) SATB Buffer Queue. This is where completed SATB buffers are
3807 3964 made available. Buffers are regularly removed from this queue
3808 3965 and scanned for roots, so that the queue doesn't get too
3809 3966 long. During remark, all completed buffers are processed, as
3810 3967 well as the filled in parts of any uncompleted buffers.
3811 3968
3812 3969 The do_marking_step() method tries to abort when the time target
3813 3970 has been reached. There are a few other cases when the
3814 3971 do_marking_step() method also aborts:
3815 3972
3816 3973 (1) When the marking phase has been aborted (after a Full GC).
3817 3974
3818 3975 (2) When a global overflow (on the global stack) has been
3819 3976 triggered. Before the task aborts, it will actually sync up with
3820 3977 the other tasks to ensure that all the marking data structures
3821 3978 (local queues, stacks, fingers etc.) are re-initialised so that
3822 3979 when do_marking_step() completes, the marking phase can
3823 3980 immediately restart.
3824 3981
3825 3982 (3) When enough completed SATB buffers are available. The
3826 3983 do_marking_step() method only tries to drain SATB buffers right
3827 3984 at the beginning. So, if enough buffers are available, the
3828 3985 marking step aborts and the SATB buffers are processed at
3829 3986 the beginning of the next invocation.
3830 3987
3831 3988 (4) To yield. when we have to yield then we abort and yield
3832 3989 right at the end of do_marking_step(). This saves us from a lot
3833 3990 of hassle as, by yielding we might allow a Full GC. If this
3834 3991 happens then objects will be compacted underneath our feet, the
3835 3992 heap might shrink, etc. We save checking for this by just
3836 3993 aborting and doing the yield right at the end.
3837 3994
3838 3995 From the above it follows that the do_marking_step() method should
3839 3996 be called in a loop (or, otherwise, regularly) until it completes.
3840 3997
3841 3998 If a marking step completes without its has_aborted() flag being
3842 3999 true, it means it has completed the current marking phase (and
3843 4000 also all other marking tasks have done so and have all synced up).
3844 4001
3845 4002 A method called regular_clock_call() is invoked "regularly" (in
3846 4003 sub ms intervals) throughout marking. It is this clock method that
3847 4004 checks all the abort conditions which were mentioned above and
3848 4005 decides when the task should abort. A work-based scheme is used to
3849 4006 trigger this clock method: when the number of object words the
3850 4007 marking phase has scanned or the number of references the marking
3851 4008 phase has visited reach a given limit. Additional invocations to
3852 4009 the method clock have been planted in a few other strategic places
3853 4010 too. The initial reason for the clock method was to avoid calling
3854 4011 vtime too regularly, as it is quite expensive. So, once it was in
3855 4012 place, it was natural to piggy-back all the other conditions on it
3856 4013 too and not constantly check them throughout the code.
3857 4014
3858 4015 *****************************************************************************/
3859 4016
3860 4017 void CMTask::do_marking_step(double time_target_ms,
3861 4018 bool do_stealing,
3862 4019 bool do_termination) {
3863 4020 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3864 4021 assert(concurrent() == _cm->concurrent(), "they should be the same");
3865 4022
3866 4023 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3867 4024 assert(_task_queues != NULL, "invariant");
3868 4025 assert(_task_queue != NULL, "invariant");
3869 4026 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3870 4027
3871 4028 assert(!_claimed,
3872 4029 "only one thread should claim this task at any one time");
3873 4030
3874 4031 // OK, this doesn't safeguard again all possible scenarios, as it is
3875 4032 // possible for two threads to set the _claimed flag at the same
3876 4033 // time. But it is only for debugging purposes anyway and it will
3877 4034 // catch most problems.
3878 4035 _claimed = true;
3879 4036
3880 4037 _start_time_ms = os::elapsedVTime() * 1000.0;
3881 4038 statsOnly( _interval_start_time_ms = _start_time_ms );
3882 4039
3883 4040 double diff_prediction_ms =
3884 4041 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3885 4042 _time_target_ms = time_target_ms - diff_prediction_ms;
3886 4043
3887 4044 // set up the variables that are used in the work-based scheme to
3888 4045 // call the regular clock method
3889 4046 _words_scanned = 0;
3890 4047 _refs_reached = 0;
3891 4048 recalculate_limits();
3892 4049
3893 4050 // clear all flags
3894 4051 clear_has_aborted();
3895 4052 _has_timed_out = false;
3896 4053 _draining_satb_buffers = false;
3897 4054
3898 4055 ++_calls;
3899 4056
3900 4057 if (_cm->verbose_low()) {
3901 4058 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3902 4059 "target = %1.2lfms >>>>>>>>>>",
3903 4060 _worker_id, _calls, _time_target_ms);
3904 4061 }
3905 4062
3906 4063 // Set up the bitmap and oop closures. Anything that uses them is
3907 4064 // eventually called from this method, so it is OK to allocate these
3908 4065 // statically.
3909 4066 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3910 4067 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
3911 4068 set_cm_oop_closure(&cm_oop_closure);
3912 4069
3913 4070 if (_cm->has_overflown()) {
3914 4071 // This can happen if the mark stack overflows during a GC pause
3915 4072 // and this task, after a yield point, restarts. We have to abort
3916 4073 // as we need to get into the overflow protocol which happens
3917 4074 // right at the end of this task.
3918 4075 set_has_aborted();
3919 4076 }
3920 4077
3921 4078 // First drain any available SATB buffers. After this, we will not
3922 4079 // look at SATB buffers before the next invocation of this method.
3923 4080 // If enough completed SATB buffers are queued up, the regular clock
3924 4081 // will abort this task so that it restarts.
3925 4082 drain_satb_buffers();
3926 4083 // ...then partially drain the local queue and the global stack
3927 4084 drain_local_queue(true);
3928 4085 drain_global_stack(true);
3929 4086
3930 4087 do {
3931 4088 if (!has_aborted() && _curr_region != NULL) {
3932 4089 // This means that we're already holding on to a region.
3933 4090 assert(_finger != NULL, "if region is not NULL, then the finger "
3934 4091 "should not be NULL either");
3935 4092
3936 4093 // We might have restarted this task after an evacuation pause
3937 4094 // which might have evacuated the region we're holding on to
3938 4095 // underneath our feet. Let's read its limit again to make sure
3939 4096 // that we do not iterate over a region of the heap that
3940 4097 // contains garbage (update_region_limit() will also move
3941 4098 // _finger to the start of the region if it is found empty).
3942 4099 update_region_limit();
3943 4100 // We will start from _finger not from the start of the region,
3944 4101 // as we might be restarting this task after aborting half-way
3945 4102 // through scanning this region. In this case, _finger points to
3946 4103 // the address where we last found a marked object. If this is a
3947 4104 // fresh region, _finger points to start().
3948 4105 MemRegion mr = MemRegion(_finger, _region_limit);
3949 4106
3950 4107 if (_cm->verbose_low()) {
3951 4108 gclog_or_tty->print_cr("[%u] we're scanning part "
3952 4109 "["PTR_FORMAT", "PTR_FORMAT") "
3953 4110 "of region "PTR_FORMAT,
3954 4111 _worker_id, _finger, _region_limit, _curr_region);
3955 4112 }
3956 4113
3957 4114 // Let's iterate over the bitmap of the part of the
3958 4115 // region that is left.
3959 4116 if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3960 4117 // We successfully completed iterating over the region. Now,
3961 4118 // let's give up the region.
3962 4119 giveup_current_region();
3963 4120 regular_clock_call();
3964 4121 } else {
3965 4122 assert(has_aborted(), "currently the only way to do so");
3966 4123 // The only way to abort the bitmap iteration is to return
3967 4124 // false from the do_bit() method. However, inside the
3968 4125 // do_bit() method we move the _finger to point to the
3969 4126 // object currently being looked at. So, if we bail out, we
3970 4127 // have definitely set _finger to something non-null.
3971 4128 assert(_finger != NULL, "invariant");
3972 4129
3973 4130 // Region iteration was actually aborted. So now _finger
3974 4131 // points to the address of the object we last scanned. If we
3975 4132 // leave it there, when we restart this task, we will rescan
3976 4133 // the object. It is easy to avoid this. We move the finger by
3977 4134 // enough to point to the next possible object header (the
3978 4135 // bitmap knows by how much we need to move it as it knows its
3979 4136 // granularity).
3980 4137 assert(_finger < _region_limit, "invariant");
3981 4138 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3982 4139 // Check if bitmap iteration was aborted while scanning the last object
3983 4140 if (new_finger >= _region_limit) {
3984 4141 giveup_current_region();
3985 4142 } else {
3986 4143 move_finger_to(new_finger);
3987 4144 }
3988 4145 }
3989 4146 }
3990 4147 // At this point we have either completed iterating over the
3991 4148 // region we were holding on to, or we have aborted.
3992 4149
3993 4150 // We then partially drain the local queue and the global stack.
3994 4151 // (Do we really need this?)
3995 4152 drain_local_queue(true);
3996 4153 drain_global_stack(true);
3997 4154
3998 4155 // Read the note on the claim_region() method on why it might
3999 4156 // return NULL with potentially more regions available for
4000 4157 // claiming and why we have to check out_of_regions() to determine
4001 4158 // whether we're done or not.
4002 4159 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4003 4160 // We are going to try to claim a new region. We should have
4004 4161 // given up on the previous one.
4005 4162 // Separated the asserts so that we know which one fires.
4006 4163 assert(_curr_region == NULL, "invariant");
4007 4164 assert(_finger == NULL, "invariant");
4008 4165 assert(_region_limit == NULL, "invariant");
4009 4166 if (_cm->verbose_low()) {
4010 4167 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4011 4168 }
4012 4169 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4013 4170 if (claimed_region != NULL) {
4014 4171 // Yes, we managed to claim one
4015 4172 statsOnly( ++_regions_claimed );
4016 4173
4017 4174 if (_cm->verbose_low()) {
4018 4175 gclog_or_tty->print_cr("[%u] we successfully claimed "
4019 4176 "region "PTR_FORMAT,
4020 4177 _worker_id, claimed_region);
4021 4178 }
4022 4179
4023 4180 setup_for_region(claimed_region);
4024 4181 assert(_curr_region == claimed_region, "invariant");
4025 4182 }
4026 4183 // It is important to call the regular clock here. It might take
4027 4184 // a while to claim a region if, for example, we hit a large
4028 4185 // block of empty regions. So we need to call the regular clock
4029 4186 // method once round the loop to make sure it's called
4030 4187 // frequently enough.
4031 4188 regular_clock_call();
4032 4189 }
4033 4190
4034 4191 if (!has_aborted() && _curr_region == NULL) {
4035 4192 assert(_cm->out_of_regions(),
4036 4193 "at this point we should be out of regions");
4037 4194 }
4038 4195 } while ( _curr_region != NULL && !has_aborted());
4039 4196
4040 4197 if (!has_aborted()) {
4041 4198 // We cannot check whether the global stack is empty, since other
4042 4199 // tasks might be pushing objects to it concurrently.
4043 4200 assert(_cm->out_of_regions(),
4044 4201 "at this point we should be out of regions");
4045 4202
4046 4203 if (_cm->verbose_low()) {
4047 4204 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4048 4205 }
4049 4206
4050 4207 // Try to reduce the number of available SATB buffers so that
4051 4208 // remark has less work to do.
4052 4209 drain_satb_buffers();
4053 4210 }
4054 4211
4055 4212 // Since we've done everything else, we can now totally drain the
4056 4213 // local queue and global stack.
4057 4214 drain_local_queue(false);
4058 4215 drain_global_stack(false);
4059 4216
4060 4217 // Attempt at work stealing from other task's queues.
4061 4218 if (do_stealing && !has_aborted()) {
4062 4219 // We have not aborted. This means that we have finished all that
4063 4220 // we could. Let's try to do some stealing...
4064 4221
4065 4222 // We cannot check whether the global stack is empty, since other
4066 4223 // tasks might be pushing objects to it concurrently.
4067 4224 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4068 4225 "only way to reach here");
4069 4226
4070 4227 if (_cm->verbose_low()) {
4071 4228 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4072 4229 }
4073 4230
4074 4231 while (!has_aborted()) {
4075 4232 oop obj;
4076 4233 statsOnly( ++_steal_attempts );
4077 4234
4078 4235 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4079 4236 if (_cm->verbose_medium()) {
4080 4237 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4081 4238 _worker_id, (void*) obj);
4082 4239 }
4083 4240
4084 4241 statsOnly( ++_steals );
4085 4242
4086 4243 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4087 4244 "any stolen object should be marked");
4088 4245 scan_object(obj);
4089 4246
4090 4247 // And since we're towards the end, let's totally drain the
4091 4248 // local queue and global stack.
4092 4249 drain_local_queue(false);
4093 4250 drain_global_stack(false);
4094 4251 } else {
4095 4252 break;
4096 4253 }
4097 4254 }
4098 4255 }
4099 4256
4100 4257 // If we are about to wrap up and go into termination, check if we
4101 4258 // should raise the overflow flag.
4102 4259 if (do_termination && !has_aborted()) {
4103 4260 if (_cm->force_overflow()->should_force()) {
4104 4261 _cm->set_has_overflown();
4105 4262 regular_clock_call();
4106 4263 }
4107 4264 }
4108 4265
4109 4266 // We still haven't aborted. Now, let's try to get into the
4110 4267 // termination protocol.
4111 4268 if (do_termination && !has_aborted()) {
4112 4269 // We cannot check whether the global stack is empty, since other
4113 4270 // tasks might be concurrently pushing objects on it.
4114 4271 // Separated the asserts so that we know which one fires.
4115 4272 assert(_cm->out_of_regions(), "only way to reach here");
4116 4273 assert(_task_queue->size() == 0, "only way to reach here");
4117 4274
4118 4275 if (_cm->verbose_low()) {
4119 4276 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4120 4277 }
4121 4278
4122 4279 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4123 4280 // The CMTask class also extends the TerminatorTerminator class,
4124 4281 // hence its should_exit_termination() method will also decide
4125 4282 // whether to exit the termination protocol or not.
4126 4283 bool finished = _cm->terminator()->offer_termination(this);
4127 4284 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4128 4285 _termination_time_ms +=
4129 4286 termination_end_time_ms - _termination_start_time_ms;
4130 4287
4131 4288 if (finished) {
4132 4289 // We're all done.
4133 4290
4134 4291 if (_worker_id == 0) {
4135 4292 // let's allow task 0 to do this
4136 4293 if (concurrent()) {
4137 4294 assert(_cm->concurrent_marking_in_progress(), "invariant");
4138 4295 // we need to set this to false before the next
4139 4296 // safepoint. This way we ensure that the marking phase
4140 4297 // doesn't observe any more heap expansions.
4141 4298 _cm->clear_concurrent_marking_in_progress();
4142 4299 }
4143 4300 }
4144 4301
4145 4302 // We can now guarantee that the global stack is empty, since
4146 4303 // all other tasks have finished. We separated the guarantees so
4147 4304 // that, if a condition is false, we can immediately find out
4148 4305 // which one.
4149 4306 guarantee(_cm->out_of_regions(), "only way to reach here");
4150 4307 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4151 4308 guarantee(_task_queue->size() == 0, "only way to reach here");
4152 4309 guarantee(!_cm->has_overflown(), "only way to reach here");
4153 4310 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4154 4311
4155 4312 if (_cm->verbose_low()) {
4156 4313 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4157 4314 }
4158 4315 } else {
4159 4316 // Apparently there's more work to do. Let's abort this task. It
4160 4317 // will restart it and we can hopefully find more things to do.
4161 4318
4162 4319 if (_cm->verbose_low()) {
4163 4320 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4164 4321 _worker_id);
4165 4322 }
4166 4323
4167 4324 set_has_aborted();
4168 4325 statsOnly( ++_aborted_termination );
4169 4326 }
4170 4327 }
4171 4328
4172 4329 // Mainly for debugging purposes to make sure that a pointer to the
4173 4330 // closure which was statically allocated in this frame doesn't
4174 4331 // escape it by accident.
4175 4332 set_cm_oop_closure(NULL);
4176 4333 double end_time_ms = os::elapsedVTime() * 1000.0;
4177 4334 double elapsed_time_ms = end_time_ms - _start_time_ms;
4178 4335 // Update the step history.
4179 4336 _step_times_ms.add(elapsed_time_ms);
4180 4337
4181 4338 if (has_aborted()) {
4182 4339 // The task was aborted for some reason.
4183 4340
4184 4341 statsOnly( ++_aborted );
4185 4342
4186 4343 if (_has_timed_out) {
4187 4344 double diff_ms = elapsed_time_ms - _time_target_ms;
4188 4345 // Keep statistics of how well we did with respect to hitting
4189 4346 // our target only if we actually timed out (if we aborted for
4190 4347 // other reasons, then the results might get skewed).
4191 4348 _marking_step_diffs_ms.add(diff_ms);
4192 4349 }
4193 4350
4194 4351 if (_cm->has_overflown()) {
4195 4352 // This is the interesting one. We aborted because a global
4196 4353 // overflow was raised. This means we have to restart the
4197 4354 // marking phase and start iterating over regions. However, in
4198 4355 // order to do this we have to make sure that all tasks stop
4199 4356 // what they are doing and re-initialise in a safe manner. We
4200 4357 // will achieve this with the use of two barrier sync points.
4201 4358
4202 4359 if (_cm->verbose_low()) {
4203 4360 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4204 4361 }
4205 4362
4206 4363 _cm->enter_first_sync_barrier(_worker_id);
4207 4364 // When we exit this sync barrier we know that all tasks have
4208 4365 // stopped doing marking work. So, it's now safe to
4209 4366 // re-initialise our data structures. At the end of this method,
4210 4367 // task 0 will clear the global data structures.
4211 4368
4212 4369 statsOnly( ++_aborted_overflow );
4213 4370
4214 4371 // We clear the local state of this task...
4215 4372 clear_region_fields();
4216 4373
4217 4374 // ...and enter the second barrier.
4218 4375 _cm->enter_second_sync_barrier(_worker_id);
4219 4376 // At this point everything has bee re-initialised and we're
4220 4377 // ready to restart.
4221 4378 }
4222 4379
4223 4380 if (_cm->verbose_low()) {
4224 4381 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4225 4382 "elapsed = %1.2lfms <<<<<<<<<<",
4226 4383 _worker_id, _time_target_ms, elapsed_time_ms);
4227 4384 if (_cm->has_aborted()) {
4228 4385 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4229 4386 _worker_id);
4230 4387 }
4231 4388 }
4232 4389 } else {
4233 4390 if (_cm->verbose_low()) {
4234 4391 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4235 4392 "elapsed = %1.2lfms <<<<<<<<<<",
4236 4393 _worker_id, _time_target_ms, elapsed_time_ms);
4237 4394 }
4238 4395 }
4239 4396
4240 4397 _claimed = false;
4241 4398 }
4242 4399
4243 4400 CMTask::CMTask(uint worker_id,
4244 4401 ConcurrentMark* cm,
4245 4402 size_t* marked_bytes,
4246 4403 BitMap* card_bm,
4247 4404 CMTaskQueue* task_queue,
4248 4405 CMTaskQueueSet* task_queues)
4249 4406 : _g1h(G1CollectedHeap::heap()),
4250 4407 _worker_id(worker_id), _cm(cm),
4251 4408 _claimed(false),
4252 4409 _nextMarkBitMap(NULL), _hash_seed(17),
4253 4410 _task_queue(task_queue),
4254 4411 _task_queues(task_queues),
4255 4412 _cm_oop_closure(NULL),
4256 4413 _marked_bytes_array(marked_bytes),
4257 4414 _card_bm(card_bm) {
4258 4415 guarantee(task_queue != NULL, "invariant");
4259 4416 guarantee(task_queues != NULL, "invariant");
4260 4417
4261 4418 statsOnly( _clock_due_to_scanning = 0;
4262 4419 _clock_due_to_marking = 0 );
4263 4420
4264 4421 _marking_step_diffs_ms.add(0.5);
4265 4422 }
4266 4423
4267 4424 // These are formatting macros that are used below to ensure
4268 4425 // consistent formatting. The *_H_* versions are used to format the
4269 4426 // header for a particular value and they should be kept consistent
4270 4427 // with the corresponding macro. Also note that most of the macros add
4271 4428 // the necessary white space (as a prefix) which makes them a bit
4272 4429 // easier to compose.
4273 4430
4274 4431 // All the output lines are prefixed with this string to be able to
4275 4432 // identify them easily in a large log file.
4276 4433 #define G1PPRL_LINE_PREFIX "###"
4277 4434
4278 4435 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4279 4436 #ifdef _LP64
4280 4437 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4281 4438 #else // _LP64
4282 4439 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4283 4440 #endif // _LP64
4284 4441
4285 4442 // For per-region info
4286 4443 #define G1PPRL_TYPE_FORMAT " %-4s"
4287 4444 #define G1PPRL_TYPE_H_FORMAT " %4s"
4288 4445 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4289 4446 #define G1PPRL_BYTE_H_FORMAT " %9s"
4290 4447 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4291 4448 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4292 4449
4293 4450 // For summary info
4294 4451 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4295 4452 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4296 4453 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4297 4454 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4298 4455
4299 4456 G1PrintRegionLivenessInfoClosure::
4300 4457 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4301 4458 : _out(out),
4302 4459 _total_used_bytes(0), _total_capacity_bytes(0),
4303 4460 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4304 4461 _hum_used_bytes(0), _hum_capacity_bytes(0),
4305 4462 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4306 4463 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4307 4464 MemRegion g1_committed = g1h->g1_committed();
4308 4465 MemRegion g1_reserved = g1h->g1_reserved();
4309 4466 double now = os::elapsedTime();
4310 4467
4311 4468 // Print the header of the output.
4312 4469 _out->cr();
4313 4470 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4314 4471 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4315 4472 G1PPRL_SUM_ADDR_FORMAT("committed")
4316 4473 G1PPRL_SUM_ADDR_FORMAT("reserved")
4317 4474 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4318 4475 g1_committed.start(), g1_committed.end(),
4319 4476 g1_reserved.start(), g1_reserved.end(),
4320 4477 HeapRegion::GrainBytes);
4321 4478 _out->print_cr(G1PPRL_LINE_PREFIX);
4322 4479 _out->print_cr(G1PPRL_LINE_PREFIX
4323 4480 G1PPRL_TYPE_H_FORMAT
4324 4481 G1PPRL_ADDR_BASE_H_FORMAT
4325 4482 G1PPRL_BYTE_H_FORMAT
4326 4483 G1PPRL_BYTE_H_FORMAT
4327 4484 G1PPRL_BYTE_H_FORMAT
4328 4485 G1PPRL_DOUBLE_H_FORMAT,
4329 4486 "type", "address-range",
4330 4487 "used", "prev-live", "next-live", "gc-eff");
4331 4488 _out->print_cr(G1PPRL_LINE_PREFIX
4332 4489 G1PPRL_TYPE_H_FORMAT
4333 4490 G1PPRL_ADDR_BASE_H_FORMAT
4334 4491 G1PPRL_BYTE_H_FORMAT
4335 4492 G1PPRL_BYTE_H_FORMAT
4336 4493 G1PPRL_BYTE_H_FORMAT
4337 4494 G1PPRL_DOUBLE_H_FORMAT,
4338 4495 "", "",
4339 4496 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4340 4497 }
4341 4498
4342 4499 // It takes as a parameter a reference to one of the _hum_* fields, it
4343 4500 // deduces the corresponding value for a region in a humongous region
4344 4501 // series (either the region size, or what's left if the _hum_* field
4345 4502 // is < the region size), and updates the _hum_* field accordingly.
4346 4503 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4347 4504 size_t bytes = 0;
4348 4505 // The > 0 check is to deal with the prev and next live bytes which
4349 4506 // could be 0.
4350 4507 if (*hum_bytes > 0) {
4351 4508 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4352 4509 *hum_bytes -= bytes;
4353 4510 }
4354 4511 return bytes;
4355 4512 }
4356 4513
4357 4514 // It deduces the values for a region in a humongous region series
4358 4515 // from the _hum_* fields and updates those accordingly. It assumes
4359 4516 // that that _hum_* fields have already been set up from the "starts
4360 4517 // humongous" region and we visit the regions in address order.
4361 4518 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4362 4519 size_t* capacity_bytes,
4363 4520 size_t* prev_live_bytes,
4364 4521 size_t* next_live_bytes) {
4365 4522 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4366 4523 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4367 4524 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4368 4525 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4369 4526 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4370 4527 }
4371 4528
4372 4529 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4373 4530 const char* type = "";
4374 4531 HeapWord* bottom = r->bottom();
4375 4532 HeapWord* end = r->end();
4376 4533 size_t capacity_bytes = r->capacity();
4377 4534 size_t used_bytes = r->used();
4378 4535 size_t prev_live_bytes = r->live_bytes();
4379 4536 size_t next_live_bytes = r->next_live_bytes();
4380 4537 double gc_eff = r->gc_efficiency();
4381 4538 if (r->used() == 0) {
4382 4539 type = "FREE";
4383 4540 } else if (r->is_survivor()) {
4384 4541 type = "SURV";
4385 4542 } else if (r->is_young()) {
4386 4543 type = "EDEN";
4387 4544 } else if (r->startsHumongous()) {
4388 4545 type = "HUMS";
4389 4546
4390 4547 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4391 4548 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4392 4549 "they should have been zeroed after the last time we used them");
4393 4550 // Set up the _hum_* fields.
4394 4551 _hum_capacity_bytes = capacity_bytes;
4395 4552 _hum_used_bytes = used_bytes;
4396 4553 _hum_prev_live_bytes = prev_live_bytes;
4397 4554 _hum_next_live_bytes = next_live_bytes;
4398 4555 get_hum_bytes(&used_bytes, &capacity_bytes,
4399 4556 &prev_live_bytes, &next_live_bytes);
4400 4557 end = bottom + HeapRegion::GrainWords;
4401 4558 } else if (r->continuesHumongous()) {
4402 4559 type = "HUMC";
4403 4560 get_hum_bytes(&used_bytes, &capacity_bytes,
4404 4561 &prev_live_bytes, &next_live_bytes);
4405 4562 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4406 4563 } else {
4407 4564 type = "OLD";
4408 4565 }
4409 4566
4410 4567 _total_used_bytes += used_bytes;
4411 4568 _total_capacity_bytes += capacity_bytes;
4412 4569 _total_prev_live_bytes += prev_live_bytes;
4413 4570 _total_next_live_bytes += next_live_bytes;
4414 4571
4415 4572 // Print a line for this particular region.
4416 4573 _out->print_cr(G1PPRL_LINE_PREFIX
4417 4574 G1PPRL_TYPE_FORMAT
4418 4575 G1PPRL_ADDR_BASE_FORMAT
4419 4576 G1PPRL_BYTE_FORMAT
4420 4577 G1PPRL_BYTE_FORMAT
4421 4578 G1PPRL_BYTE_FORMAT
4422 4579 G1PPRL_DOUBLE_FORMAT,
4423 4580 type, bottom, end,
4424 4581 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4425 4582
4426 4583 return false;
4427 4584 }
4428 4585
4429 4586 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4430 4587 // Print the footer of the output.
4431 4588 _out->print_cr(G1PPRL_LINE_PREFIX);
4432 4589 _out->print_cr(G1PPRL_LINE_PREFIX
4433 4590 " SUMMARY"
4434 4591 G1PPRL_SUM_MB_FORMAT("capacity")
4435 4592 G1PPRL_SUM_MB_PERC_FORMAT("used")
4436 4593 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4437 4594 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4438 4595 bytes_to_mb(_total_capacity_bytes),
4439 4596 bytes_to_mb(_total_used_bytes),
4440 4597 perc(_total_used_bytes, _total_capacity_bytes),
4441 4598 bytes_to_mb(_total_prev_live_bytes),
4442 4599 perc(_total_prev_live_bytes, _total_capacity_bytes),
4443 4600 bytes_to_mb(_total_next_live_bytes),
4444 4601 perc(_total_next_live_bytes, _total_capacity_bytes));
4445 4602 _out->cr();
4446 4603 }
↓ open down ↓ |
1583 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX