Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp
1 1 /*
2 2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 4 *
5 5 * This code is free software; you can redistribute it and/or modify it
6 6 * under the terms of the GNU General Public License version 2 only, as
7 7 * published by the Free Software Foundation.
8 8 *
9 9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 12 * version 2 for more details (a copy is included in the LICENSE file that
13 13 * accompanied this code).
14 14 *
15 15 * You should have received a copy of the GNU General Public License version
16 16 * 2 along with this work; if not, write to the Free Software Foundation,
17 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 18 *
19 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 20 * or visit www.oracle.com if you need additional information or have any
21 21 * questions.
22 22 *
23 23 */
24 24
25 25 #include "precompiled.hpp"
26 26 #include "classfile/symbolTable.hpp"
27 27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
33 33 #include "gc_implementation/g1/g1RemSet.hpp"
34 34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
35 35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
36 36 #include "gc_implementation/shared/vmGCOperations.hpp"
37 37 #include "memory/genOopClosures.inline.hpp"
38 38 #include "memory/referencePolicy.hpp"
39 39 #include "memory/resourceArea.hpp"
40 40 #include "oops/oop.inline.hpp"
41 41 #include "runtime/handles.inline.hpp"
42 42 #include "runtime/java.hpp"
43 43
44 44 //
45 45 // CMS Bit Map Wrapper
46 46
47 47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
48 48 _bm((uintptr_t*)NULL,0),
49 49 _shifter(shifter) {
50 50 _bmStartWord = (HeapWord*)(rs.base());
51 51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
52 52 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
53 53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
54 54
55 55 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
56 56 // For now we'll just commit all of the bit map up fromt.
57 57 // Later on we'll try to be more parsimonious with swap.
58 58 guarantee(_virtual_space.initialize(brs, brs.size()),
59 59 "couldn't reseve backing store for CMS bit map");
60 60 assert(_virtual_space.committed_size() == brs.size(),
61 61 "didn't reserve backing store for all of CMS bit map?");
62 62 _bm.set_map((uintptr_t*)_virtual_space.low());
63 63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
64 64 _bmWordSize, "inconsistency in bit map sizing");
65 65 _bm.set_size(_bmWordSize >> _shifter);
66 66 }
67 67
68 68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
69 69 HeapWord* limit) const {
70 70 // First we must round addr *up* to a possible object boundary.
71 71 addr = (HeapWord*)align_size_up((intptr_t)addr,
72 72 HeapWordSize << _shifter);
73 73 size_t addrOffset = heapWordToOffset(addr);
74 74 if (limit == NULL) {
75 75 limit = _bmStartWord + _bmWordSize;
76 76 }
77 77 size_t limitOffset = heapWordToOffset(limit);
78 78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
79 79 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
80 80 assert(nextAddr >= addr, "get_next_one postcondition");
81 81 assert(nextAddr == limit || isMarked(nextAddr),
82 82 "get_next_one postcondition");
83 83 return nextAddr;
84 84 }
85 85
86 86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
87 87 HeapWord* limit) const {
88 88 size_t addrOffset = heapWordToOffset(addr);
89 89 if (limit == NULL) {
90 90 limit = _bmStartWord + _bmWordSize;
91 91 }
92 92 size_t limitOffset = heapWordToOffset(limit);
93 93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
94 94 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
95 95 assert(nextAddr >= addr, "get_next_one postcondition");
96 96 assert(nextAddr == limit || !isMarked(nextAddr),
97 97 "get_next_one postcondition");
98 98 return nextAddr;
99 99 }
100 100
101 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
102 102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
103 103 return (int) (diff >> _shifter);
104 104 }
105 105
106 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
107 107 HeapWord* left = MAX2(_bmStartWord, mr.start());
108 108 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
109 109 if (right > left) {
110 110 // Right-open interval [leftOffset, rightOffset).
111 111 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
112 112 } else {
113 113 return true;
114 114 }
115 115 }
116 116
117 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
118 118 size_t from_start_index,
119 119 HeapWord* to_start_word,
120 120 size_t word_num) {
121 121 _bm.mostly_disjoint_range_union(from_bitmap,
122 122 from_start_index,
123 123 heapWordToOffset(to_start_word),
124 124 word_num);
125 125 }
126 126
127 127 #ifndef PRODUCT
128 128 bool CMBitMapRO::covers(ReservedSpace rs) const {
129 129 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
130 130 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
131 131 "size inconsistency");
132 132 return _bmStartWord == (HeapWord*)(rs.base()) &&
133 133 _bmWordSize == rs.size()>>LogHeapWordSize;
134 134 }
135 135 #endif
136 136
137 137 void CMBitMap::clearAll() {
138 138 _bm.clear();
139 139 return;
140 140 }
141 141
142 142 void CMBitMap::markRange(MemRegion mr) {
143 143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
144 144 assert(!mr.is_empty(), "unexpected empty region");
145 145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
146 146 ((HeapWord *) mr.end())),
147 147 "markRange memory region end is not card aligned");
148 148 // convert address range into offset range
149 149 _bm.at_put_range(heapWordToOffset(mr.start()),
150 150 heapWordToOffset(mr.end()), true);
151 151 }
152 152
153 153 void CMBitMap::clearRange(MemRegion mr) {
154 154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
155 155 assert(!mr.is_empty(), "unexpected empty region");
156 156 // convert address range into offset range
157 157 _bm.at_put_range(heapWordToOffset(mr.start()),
158 158 heapWordToOffset(mr.end()), false);
159 159 }
160 160
161 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
162 162 HeapWord* end_addr) {
163 163 HeapWord* start = getNextMarkedWordAddress(addr);
164 164 start = MIN2(start, end_addr);
165 165 HeapWord* end = getNextUnmarkedWordAddress(start);
166 166 end = MIN2(end, end_addr);
167 167 assert(start <= end, "Consistency check");
168 168 MemRegion mr(start, end);
169 169 if (!mr.is_empty()) {
170 170 clearRange(mr);
171 171 }
172 172 return mr;
173 173 }
174 174
175 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
176 176 _base(NULL), _cm(cm)
177 177 #ifdef ASSERT
178 178 , _drain_in_progress(false)
179 179 , _drain_in_progress_yields(false)
180 180 #endif
181 181 {}
182 182
183 183 void CMMarkStack::allocate(size_t size) {
184 184 _base = NEW_C_HEAP_ARRAY(oop, size);
185 185 if (_base == NULL) {
186 186 vm_exit_during_initialization("Failed to allocate "
187 187 "CM region mark stack");
188 188 }
189 189 _index = 0;
190 190 _capacity = (jint) size;
191 191 _oops_do_bound = -1;
192 192 NOT_PRODUCT(_max_depth = 0);
193 193 }
194 194
195 195 CMMarkStack::~CMMarkStack() {
196 196 if (_base != NULL) {
197 197 FREE_C_HEAP_ARRAY(oop, _base);
198 198 }
199 199 }
200 200
201 201 void CMMarkStack::par_push(oop ptr) {
202 202 while (true) {
203 203 if (isFull()) {
204 204 _overflow = true;
205 205 return;
206 206 }
207 207 // Otherwise...
208 208 jint index = _index;
209 209 jint next_index = index+1;
210 210 jint res = Atomic::cmpxchg(next_index, &_index, index);
211 211 if (res == index) {
212 212 _base[index] = ptr;
213 213 // Note that we don't maintain this atomically. We could, but it
214 214 // doesn't seem necessary.
215 215 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
216 216 return;
217 217 }
218 218 // Otherwise, we need to try again.
219 219 }
220 220 }
221 221
222 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
223 223 while (true) {
224 224 if (isFull()) {
225 225 _overflow = true;
226 226 return;
227 227 }
228 228 // Otherwise...
229 229 jint index = _index;
230 230 jint next_index = index + n;
231 231 if (next_index > _capacity) {
232 232 _overflow = true;
233 233 return;
234 234 }
235 235 jint res = Atomic::cmpxchg(next_index, &_index, index);
236 236 if (res == index) {
237 237 for (int i = 0; i < n; i++) {
238 238 int ind = index + i;
239 239 assert(ind < _capacity, "By overflow test above.");
240 240 _base[ind] = ptr_arr[i];
241 241 }
242 242 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
243 243 return;
244 244 }
245 245 // Otherwise, we need to try again.
246 246 }
247 247 }
248 248
249 249
250 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
251 251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
252 252 jint start = _index;
253 253 jint next_index = start + n;
254 254 if (next_index > _capacity) {
255 255 _overflow = true;
256 256 return;
257 257 }
258 258 // Otherwise.
259 259 _index = next_index;
260 260 for (int i = 0; i < n; i++) {
261 261 int ind = start + i;
262 262 assert(ind < _capacity, "By overflow test above.");
263 263 _base[ind] = ptr_arr[i];
264 264 }
265 265 }
266 266
267 267
268 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
269 269 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
270 270 jint index = _index;
271 271 if (index == 0) {
272 272 *n = 0;
273 273 return false;
274 274 } else {
275 275 int k = MIN2(max, index);
276 276 jint new_ind = index - k;
277 277 for (int j = 0; j < k; j++) {
278 278 ptr_arr[j] = _base[new_ind + j];
279 279 }
280 280 _index = new_ind;
281 281 *n = k;
282 282 return true;
283 283 }
284 284 }
285 285
286 286
287 287 CMRegionStack::CMRegionStack() : _base(NULL) {}
288 288
289 289 void CMRegionStack::allocate(size_t size) {
290 290 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
291 291 if (_base == NULL) {
292 292 vm_exit_during_initialization("Failed to allocate CM region mark stack");
293 293 }
294 294 _index = 0;
295 295 _capacity = (jint) size;
296 296 }
297 297
298 298 CMRegionStack::~CMRegionStack() {
299 299 if (_base != NULL) {
300 300 FREE_C_HEAP_ARRAY(oop, _base);
301 301 }
302 302 }
303 303
304 304 void CMRegionStack::push_lock_free(MemRegion mr) {
305 305 assert(mr.word_size() > 0, "Precondition");
306 306 while (true) {
307 307 jint index = _index;
308 308
309 309 if (index >= _capacity) {
310 310 _overflow = true;
311 311 return;
312 312 }
313 313 // Otherwise...
314 314 jint next_index = index+1;
315 315 jint res = Atomic::cmpxchg(next_index, &_index, index);
316 316 if (res == index) {
317 317 _base[index] = mr;
318 318 return;
319 319 }
320 320 // Otherwise, we need to try again.
321 321 }
322 322 }
323 323
324 324 // Lock-free pop of the region stack. Called during the concurrent
325 325 // marking / remark phases. Should only be called in tandem with
326 326 // other lock-free pops.
327 327 MemRegion CMRegionStack::pop_lock_free() {
328 328 while (true) {
329 329 jint index = _index;
330 330
331 331 if (index == 0) {
332 332 return MemRegion();
333 333 }
334 334 // Otherwise...
335 335 jint next_index = index-1;
336 336 jint res = Atomic::cmpxchg(next_index, &_index, index);
337 337 if (res == index) {
338 338 MemRegion mr = _base[next_index];
339 339 if (mr.start() != NULL) {
340 340 assert(mr.end() != NULL, "invariant");
341 341 assert(mr.word_size() > 0, "invariant");
342 342 return mr;
343 343 } else {
344 344 // that entry was invalidated... let's skip it
345 345 assert(mr.end() == NULL, "invariant");
346 346 }
347 347 }
348 348 // Otherwise, we need to try again.
349 349 }
350 350 }
351 351
352 352 #if 0
353 353 // The routines that manipulate the region stack with a lock are
354 354 // not currently used. They should be retained, however, as a
355 355 // diagnostic aid.
356 356
357 357 void CMRegionStack::push_with_lock(MemRegion mr) {
358 358 assert(mr.word_size() > 0, "Precondition");
359 359 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
360 360
361 361 if (isFull()) {
362 362 _overflow = true;
363 363 return;
364 364 }
365 365
366 366 _base[_index] = mr;
367 367 _index += 1;
368 368 }
369 369
370 370 MemRegion CMRegionStack::pop_with_lock() {
371 371 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
372 372
373 373 while (true) {
374 374 if (_index == 0) {
375 375 return MemRegion();
376 376 }
377 377 _index -= 1;
378 378
379 379 MemRegion mr = _base[_index];
380 380 if (mr.start() != NULL) {
381 381 assert(mr.end() != NULL, "invariant");
382 382 assert(mr.word_size() > 0, "invariant");
383 383 return mr;
384 384 } else {
385 385 // that entry was invalidated... let's skip it
386 386 assert(mr.end() == NULL, "invariant");
387 387 }
388 388 }
389 389 }
390 390 #endif
391 391
392 392 bool CMRegionStack::invalidate_entries_into_cset() {
393 393 bool result = false;
394 394 G1CollectedHeap* g1h = G1CollectedHeap::heap();
395 395 for (int i = 0; i < _oops_do_bound; ++i) {
396 396 MemRegion mr = _base[i];
397 397 if (mr.start() != NULL) {
398 398 assert(mr.end() != NULL, "invariant");
399 399 assert(mr.word_size() > 0, "invariant");
400 400 HeapRegion* hr = g1h->heap_region_containing(mr.start());
401 401 assert(hr != NULL, "invariant");
402 402 if (hr->in_collection_set()) {
403 403 // The region points into the collection set
404 404 _base[i] = MemRegion();
405 405 result = true;
406 406 }
407 407 } else {
408 408 // that entry was invalidated... let's skip it
409 409 assert(mr.end() == NULL, "invariant");
410 410 }
411 411 }
412 412 return result;
413 413 }
414 414
415 415 template<class OopClosureClass>
416 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
417 417 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
418 418 || SafepointSynchronize::is_at_safepoint(),
419 419 "Drain recursion must be yield-safe.");
420 420 bool res = true;
421 421 debug_only(_drain_in_progress = true);
422 422 debug_only(_drain_in_progress_yields = yield_after);
423 423 while (!isEmpty()) {
424 424 oop newOop = pop();
425 425 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
426 426 assert(newOop->is_oop(), "Expected an oop");
427 427 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
428 428 "only grey objects on this stack");
429 429 // iterate over the oops in this oop, marking and pushing
430 430 // the ones in CMS generation.
431 431 newOop->oop_iterate(cl);
432 432 if (yield_after && _cm->do_yield_check()) {
433 433 res = false;
434 434 break;
435 435 }
436 436 }
437 437 debug_only(_drain_in_progress = false);
438 438 return res;
439 439 }
440 440
441 441 void CMMarkStack::oops_do(OopClosure* f) {
442 442 if (_index == 0) return;
443 443 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
444 444 "Bound must be set.");
445 445 for (int i = 0; i < _oops_do_bound; i++) {
446 446 f->do_oop(&_base[i]);
447 447 }
448 448 _oops_do_bound = -1;
449 449 }
450 450
451 451 bool ConcurrentMark::not_yet_marked(oop obj) const {
452 452 return (_g1h->is_obj_ill(obj)
453 453 || (_g1h->is_in_permanent(obj)
454 454 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
455 455 }
456 456
457 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
458 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
459 459 #endif // _MSC_VER
460 460
461 461 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
462 462 int max_regions) :
463 463 _markBitMap1(rs, MinObjAlignment - 1),
464 464 _markBitMap2(rs, MinObjAlignment - 1),
465 465
466 466 _parallel_marking_threads(0),
467 467 _sleep_factor(0.0),
468 468 _marking_task_overhead(1.0),
469 469 _cleanup_sleep_factor(0.0),
470 470 _cleanup_task_overhead(1.0),
471 471 _cleanup_list("Cleanup List"),
472 472 _region_bm(max_regions, false /* in_resource_area*/),
473 473 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
474 474 CardTableModRefBS::card_shift,
475 475 false /* in_resource_area*/),
476 476 _prevMarkBitMap(&_markBitMap1),
477 477 _nextMarkBitMap(&_markBitMap2),
478 478 _at_least_one_mark_complete(false),
479 479
480 480 _markStack(this),
481 481 _regionStack(),
482 482 // _finger set in set_non_marking_state
483 483
484 484 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
485 485 // _active_tasks set in set_non_marking_state
486 486 // _tasks set inside the constructor
487 487 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
488 488 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
489 489
490 490 _has_overflown(false),
491 491 _concurrent(false),
492 492 _has_aborted(false),
493 493 _restart_for_overflow(false),
494 494 _concurrent_marking_in_progress(false),
495 495 _should_gray_objects(false),
496 496
497 497 // _verbose_level set below
498 498
499 499 _init_times(),
500 500 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
501 501 _cleanup_times(),
502 502 _total_counting_time(0.0),
503 503 _total_rs_scrub_time(0.0),
504 504
505 505 _parallel_workers(NULL) {
506 506 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
507 507 if (verbose_level < no_verbose) {
508 508 verbose_level = no_verbose;
509 509 }
510 510 if (verbose_level > high_verbose) {
511 511 verbose_level = high_verbose;
512 512 }
513 513 _verbose_level = verbose_level;
514 514
515 515 if (verbose_low()) {
516 516 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
517 517 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
518 518 }
519 519
520 520 _markStack.allocate(MarkStackSize);
521 521 _regionStack.allocate(G1MarkRegionStackSize);
522 522
523 523 // Create & start a ConcurrentMark thread.
524 524 _cmThread = new ConcurrentMarkThread(this);
525 525 assert(cmThread() != NULL, "CM Thread should have been created");
526 526 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
527 527
528 528 _g1h = G1CollectedHeap::heap();
529 529 assert(CGC_lock != NULL, "Where's the CGC_lock?");
530 530 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
531 531 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
532 532
533 533 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
534 534 satb_qs.set_buffer_size(G1SATBBufferSize);
535 535
536 536 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
537 537 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
538 538
539 539 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
540 540 _active_tasks = _max_task_num;
541 541 for (int i = 0; i < (int) _max_task_num; ++i) {
542 542 CMTaskQueue* task_queue = new CMTaskQueue();
543 543 task_queue->initialize();
544 544 _task_queues->register_queue(i, task_queue);
545 545
546 546 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
547 547 _accum_task_vtime[i] = 0.0;
548 548 }
549 549
550 550 if (ConcGCThreads > ParallelGCThreads) {
551 551 vm_exit_during_initialization("Can't have more ConcGCThreads "
552 552 "than ParallelGCThreads.");
553 553 }
554 554 if (ParallelGCThreads == 0) {
555 555 // if we are not running with any parallel GC threads we will not
556 556 // spawn any marking threads either
557 557 _parallel_marking_threads = 0;
558 558 _sleep_factor = 0.0;
559 559 _marking_task_overhead = 1.0;
560 560 } else {
561 561 if (ConcGCThreads > 0) {
562 562 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
563 563 // if both are set
564 564
565 565 _parallel_marking_threads = ConcGCThreads;
566 566 _sleep_factor = 0.0;
567 567 _marking_task_overhead = 1.0;
568 568 } else if (G1MarkingOverheadPercent > 0) {
569 569 // we will calculate the number of parallel marking threads
570 570 // based on a target overhead with respect to the soft real-time
571 571 // goal
572 572
573 573 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
574 574 double overall_cm_overhead =
575 575 (double) MaxGCPauseMillis * marking_overhead /
576 576 (double) GCPauseIntervalMillis;
577 577 double cpu_ratio = 1.0 / (double) os::processor_count();
578 578 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
579 579 double marking_task_overhead =
580 580 overall_cm_overhead / marking_thread_num *
581 581 (double) os::processor_count();
582 582 double sleep_factor =
583 583 (1.0 - marking_task_overhead) / marking_task_overhead;
584 584
585 585 _parallel_marking_threads = (size_t) marking_thread_num;
586 586 _sleep_factor = sleep_factor;
587 587 _marking_task_overhead = marking_task_overhead;
588 588 } else {
589 589 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
590 590 _sleep_factor = 0.0;
591 591 _marking_task_overhead = 1.0;
592 592 }
593 593
594 594 if (parallel_marking_threads() > 1) {
595 595 _cleanup_task_overhead = 1.0;
596 596 } else {
597 597 _cleanup_task_overhead = marking_task_overhead();
598 598 }
599 599 _cleanup_sleep_factor =
600 600 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
601 601
602 602 #if 0
603 603 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
604 604 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
605 605 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
606 606 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
607 607 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
608 608 #endif
609 609
610 610 guarantee(parallel_marking_threads() > 0, "peace of mind");
611 611 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
612 612 (int) _parallel_marking_threads, false, true);
613 613 if (_parallel_workers == NULL) {
614 614 vm_exit_during_initialization("Failed necessary allocation.");
615 615 } else {
616 616 _parallel_workers->initialize_workers();
617 617 }
618 618 }
619 619
620 620 // so that the call below can read a sensible value
621 621 _heap_start = (HeapWord*) rs.base();
622 622 set_non_marking_state();
623 623 }
624 624
625 625 void ConcurrentMark::update_g1_committed(bool force) {
626 626 // If concurrent marking is not in progress, then we do not need to
627 627 // update _heap_end. This has a subtle and important
628 628 // side-effect. Imagine that two evacuation pauses happen between
629 629 // marking completion and remark. The first one can grow the
630 630 // heap (hence now the finger is below the heap end). Then, the
631 631 // second one could unnecessarily push regions on the region
632 632 // stack. This causes the invariant that the region stack is empty
633 633 // at the beginning of remark to be false. By ensuring that we do
634 634 // not observe heap expansions after marking is complete, then we do
635 635 // not have this problem.
636 636 if (!concurrent_marking_in_progress() && !force) return;
637 637
638 638 MemRegion committed = _g1h->g1_committed();
639 639 assert(committed.start() == _heap_start, "start shouldn't change");
640 640 HeapWord* new_end = committed.end();
641 641 if (new_end > _heap_end) {
642 642 // The heap has been expanded.
643 643
644 644 _heap_end = new_end;
645 645 }
646 646 // Notice that the heap can also shrink. However, this only happens
647 647 // during a Full GC (at least currently) and the entire marking
648 648 // phase will bail out and the task will not be restarted. So, let's
649 649 // do nothing.
650 650 }
651 651
652 652 void ConcurrentMark::reset() {
653 653 // Starting values for these two. This should be called in a STW
654 654 // phase. CM will be notified of any future g1_committed expansions
655 655 // will be at the end of evacuation pauses, when tasks are
656 656 // inactive.
657 657 MemRegion committed = _g1h->g1_committed();
658 658 _heap_start = committed.start();
659 659 _heap_end = committed.end();
660 660
661 661 // Separated the asserts so that we know which one fires.
662 662 assert(_heap_start != NULL, "heap bounds should look ok");
663 663 assert(_heap_end != NULL, "heap bounds should look ok");
664 664 assert(_heap_start < _heap_end, "heap bounds should look ok");
665 665
666 666 // reset all the marking data structures and any necessary flags
667 667 clear_marking_state();
668 668
669 669 if (verbose_low()) {
670 670 gclog_or_tty->print_cr("[global] resetting");
671 671 }
672 672
673 673 // We do reset all of them, since different phases will use
674 674 // different number of active threads. So, it's easiest to have all
675 675 // of them ready.
676 676 for (int i = 0; i < (int) _max_task_num; ++i) {
677 677 _tasks[i]->reset(_nextMarkBitMap);
678 678 }
679 679
680 680 // we need this to make sure that the flag is on during the evac
681 681 // pause with initial mark piggy-backed
682 682 set_concurrent_marking_in_progress();
683 683 }
684 684
685 685 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
686 686 assert(active_tasks <= _max_task_num, "we should not have more");
687 687
688 688 _active_tasks = active_tasks;
689 689 // Need to update the three data structures below according to the
690 690 // number of active threads for this phase.
691 691 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
692 692 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
693 693 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
694 694
695 695 _concurrent = concurrent;
696 696 // We propagate this to all tasks, not just the active ones.
697 697 for (int i = 0; i < (int) _max_task_num; ++i)
698 698 _tasks[i]->set_concurrent(concurrent);
699 699
700 700 if (concurrent) {
701 701 set_concurrent_marking_in_progress();
702 702 } else {
703 703 // We currently assume that the concurrent flag has been set to
704 704 // false before we start remark. At this point we should also be
705 705 // in a STW phase.
706 706 assert(!concurrent_marking_in_progress(), "invariant");
707 707 assert(_finger == _heap_end, "only way to get here");
708 708 update_g1_committed(true);
709 709 }
710 710 }
711 711
712 712 void ConcurrentMark::set_non_marking_state() {
713 713 // We set the global marking state to some default values when we're
714 714 // not doing marking.
715 715 clear_marking_state();
716 716 _active_tasks = 0;
717 717 clear_concurrent_marking_in_progress();
718 718 }
719 719
720 720 ConcurrentMark::~ConcurrentMark() {
721 721 for (int i = 0; i < (int) _max_task_num; ++i) {
722 722 delete _task_queues->queue(i);
723 723 delete _tasks[i];
724 724 }
725 725 delete _task_queues;
726 726 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
727 727 }
728 728
729 729 // This closure is used to mark refs into the g1 generation
730 730 // from external roots in the CMS bit map.
731 731 // Called at the first checkpoint.
732 732 //
733 733
734 734 void ConcurrentMark::clearNextBitmap() {
735 735 G1CollectedHeap* g1h = G1CollectedHeap::heap();
736 736 G1CollectorPolicy* g1p = g1h->g1_policy();
737 737
738 738 // Make sure that the concurrent mark thread looks to still be in
739 739 // the current cycle.
740 740 guarantee(cmThread()->during_cycle(), "invariant");
741 741
742 742 // We are finishing up the current cycle by clearing the next
743 743 // marking bitmap and getting it ready for the next cycle. During
744 744 // this time no other cycle can start. So, let's make sure that this
745 745 // is the case.
746 746 guarantee(!g1h->mark_in_progress(), "invariant");
747 747
748 748 // clear the mark bitmap (no grey objects to start with).
749 749 // We need to do this in chunks and offer to yield in between
750 750 // each chunk.
751 751 HeapWord* start = _nextMarkBitMap->startWord();
752 752 HeapWord* end = _nextMarkBitMap->endWord();
753 753 HeapWord* cur = start;
754 754 size_t chunkSize = M;
755 755 while (cur < end) {
756 756 HeapWord* next = cur + chunkSize;
757 757 if (next > end) {
758 758 next = end;
759 759 }
760 760 MemRegion mr(cur,next);
761 761 _nextMarkBitMap->clearRange(mr);
762 762 cur = next;
763 763 do_yield_check();
764 764
765 765 // Repeat the asserts from above. We'll do them as asserts here to
766 766 // minimize their overhead on the product. However, we'll have
767 767 // them as guarantees at the beginning / end of the bitmap
768 768 // clearing to get some checking in the product.
769 769 assert(cmThread()->during_cycle(), "invariant");
770 770 assert(!g1h->mark_in_progress(), "invariant");
771 771 }
772 772
773 773 // Repeat the asserts from above.
774 774 guarantee(cmThread()->during_cycle(), "invariant");
775 775 guarantee(!g1h->mark_in_progress(), "invariant");
776 776 }
777 777
778 778 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
779 779 public:
780 780 bool doHeapRegion(HeapRegion* r) {
781 781 if (!r->continuesHumongous()) {
782 782 r->note_start_of_marking(true);
783 783 }
784 784 return false;
785 785 }
786 786 };
787 787
788 788 void ConcurrentMark::checkpointRootsInitialPre() {
789 789 G1CollectedHeap* g1h = G1CollectedHeap::heap();
790 790 G1CollectorPolicy* g1p = g1h->g1_policy();
791 791
792 792 _has_aborted = false;
793 793
794 794 #ifndef PRODUCT
795 795 if (G1PrintReachableAtInitialMark) {
796 796 print_reachable("at-cycle-start",
797 797 VerifyOption_G1UsePrevMarking, true /* all */);
798 798 }
799 799 #endif
800 800
801 801 // Initialise marking structures. This has to be done in a STW phase.
802 802 reset();
803 803 }
804 804
805 805
806 806 void ConcurrentMark::checkpointRootsInitialPost() {
807 807 G1CollectedHeap* g1h = G1CollectedHeap::heap();
808 808
809 809 // If we force an overflow during remark, the remark operation will
810 810 // actually abort and we'll restart concurrent marking. If we always
811 811 // force an oveflow during remark we'll never actually complete the
812 812 // marking phase. So, we initilize this here, at the start of the
813 813 // cycle, so that at the remaining overflow number will decrease at
814 814 // every remark and we'll eventually not need to cause one.
815 815 force_overflow_stw()->init();
816 816
817 817 // For each region note start of marking.
818 818 NoteStartOfMarkHRClosure startcl;
819 819 g1h->heap_region_iterate(&startcl);
820 820
821 821 // Start weak-reference discovery.
822 822 ReferenceProcessor* rp = g1h->ref_processor();
823 823 rp->verify_no_references_recorded();
824 824 rp->enable_discovery(); // enable ("weak") refs discovery
825 825 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
826 826
827 827 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
828 828 // This is the start of the marking cycle, we're expected all
829 829 // threads to have SATB queues with active set to false.
830 830 satb_mq_set.set_active_all_threads(true, /* new active value */
831 831 false /* expected_active */);
832 832
833 833 // update_g1_committed() will be called at the end of an evac pause
834 834 // when marking is on. So, it's also called at the end of the
835 835 // initial-mark pause to update the heap end, if the heap expands
836 836 // during it. No need to call it here.
837 837 }
838 838
839 839 /*
840 840 * Notice that in the next two methods, we actually leave the STS
841 841 * during the barrier sync and join it immediately afterwards. If we
842 842 * do not do this, the following deadlock can occur: one thread could
843 843 * be in the barrier sync code, waiting for the other thread to also
844 844 * sync up, whereas another one could be trying to yield, while also
845 845 * waiting for the other threads to sync up too.
846 846 *
847 847 * Note, however, that this code is also used during remark and in
848 848 * this case we should not attempt to leave / enter the STS, otherwise
849 849 * we'll either hit an asseert (debug / fastdebug) or deadlock
850 850 * (product). So we should only leave / enter the STS if we are
851 851 * operating concurrently.
852 852 *
853 853 * Because the thread that does the sync barrier has left the STS, it
854 854 * is possible to be suspended for a Full GC or an evacuation pause
855 855 * could occur. This is actually safe, since the entering the sync
856 856 * barrier is one of the last things do_marking_step() does, and it
857 857 * doesn't manipulate any data structures afterwards.
858 858 */
859 859
860 860 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
861 861 if (verbose_low()) {
862 862 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
863 863 }
864 864
865 865 if (concurrent()) {
866 866 ConcurrentGCThread::stsLeave();
867 867 }
868 868 _first_overflow_barrier_sync.enter();
869 869 if (concurrent()) {
870 870 ConcurrentGCThread::stsJoin();
871 871 }
872 872 // at this point everyone should have synced up and not be doing any
873 873 // more work
874 874
875 875 if (verbose_low()) {
876 876 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
877 877 }
878 878
879 879 // let task 0 do this
880 880 if (task_num == 0) {
881 881 // task 0 is responsible for clearing the global data structures
882 882 // We should be here because of an overflow. During STW we should
883 883 // not clear the overflow flag since we rely on it being true when
884 884 // we exit this method to abort the pause and restart concurent
885 885 // marking.
886 886 clear_marking_state(concurrent() /* clear_overflow */);
887 887 force_overflow()->update();
888 888
889 889 if (PrintGC) {
890 890 gclog_or_tty->date_stamp(PrintGCDateStamps);
891 891 gclog_or_tty->stamp(PrintGCTimeStamps);
892 892 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
893 893 }
894 894 }
895 895
896 896 // after this, each task should reset its own data structures then
897 897 // then go into the second barrier
898 898 }
899 899
900 900 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
901 901 if (verbose_low()) {
902 902 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
903 903 }
904 904
905 905 if (concurrent()) {
906 906 ConcurrentGCThread::stsLeave();
907 907 }
908 908 _second_overflow_barrier_sync.enter();
909 909 if (concurrent()) {
910 910 ConcurrentGCThread::stsJoin();
911 911 }
912 912 // at this point everything should be re-initialised and ready to go
913 913
914 914 if (verbose_low()) {
915 915 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
916 916 }
917 917 }
918 918
919 919 #ifndef PRODUCT
920 920 void ForceOverflowSettings::init() {
921 921 _num_remaining = G1ConcMarkForceOverflow;
922 922 _force = false;
923 923 update();
924 924 }
925 925
926 926 void ForceOverflowSettings::update() {
927 927 if (_num_remaining > 0) {
928 928 _num_remaining -= 1;
929 929 _force = true;
930 930 } else {
931 931 _force = false;
932 932 }
933 933 }
934 934
935 935 bool ForceOverflowSettings::should_force() {
936 936 if (_force) {
937 937 _force = false;
938 938 return true;
939 939 } else {
940 940 return false;
941 941 }
942 942 }
943 943 #endif // !PRODUCT
944 944
945 945 void ConcurrentMark::grayRoot(oop p) {
946 946 HeapWord* addr = (HeapWord*) p;
947 947 // We can't really check against _heap_start and _heap_end, since it
948 948 // is possible during an evacuation pause with piggy-backed
949 949 // initial-mark that the committed space is expanded during the
950 950 // pause without CM observing this change. So the assertions below
951 951 // is a bit conservative; but better than nothing.
952 952 assert(_g1h->g1_committed().contains(addr),
953 953 "address should be within the heap bounds");
954 954
955 955 if (!_nextMarkBitMap->isMarked(addr)) {
956 956 _nextMarkBitMap->parMark(addr);
957 957 }
958 958 }
959 959
960 960 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
961 961 // The objects on the region have already been marked "in bulk" by
962 962 // the caller. We only need to decide whether to push the region on
963 963 // the region stack or not.
964 964
965 965 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
966 966 // We're done with marking and waiting for remark. We do not need to
967 967 // push anything else on the region stack.
968 968 return;
969 969 }
970 970
971 971 HeapWord* finger = _finger;
972 972
973 973 if (verbose_low()) {
974 974 gclog_or_tty->print_cr("[global] attempting to push "
975 975 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
976 976 PTR_FORMAT, mr.start(), mr.end(), finger);
977 977 }
978 978
979 979 if (mr.start() < finger) {
980 980 // The finger is always heap region aligned and it is not possible
981 981 // for mr to span heap regions.
982 982 assert(mr.end() <= finger, "invariant");
983 983
984 984 // Separated the asserts so that we know which one fires.
985 985 assert(mr.start() <= mr.end(),
986 986 "region boundaries should fall within the committed space");
987 987 assert(_heap_start <= mr.start(),
988 988 "region boundaries should fall within the committed space");
989 989 assert(mr.end() <= _heap_end,
990 990 "region boundaries should fall within the committed space");
991 991 if (verbose_low()) {
992 992 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
993 993 "below the finger, pushing it",
994 994 mr.start(), mr.end());
995 995 }
996 996
997 997 if (!region_stack_push_lock_free(mr)) {
998 998 if (verbose_low()) {
999 999 gclog_or_tty->print_cr("[global] region stack has overflown.");
1000 1000 }
1001 1001 }
1002 1002 }
1003 1003 }
1004 1004
1005 1005 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1006 1006 // The object is not marked by the caller. We need to at least mark
1007 1007 // it and maybe push in on the stack.
1008 1008
1009 1009 HeapWord* addr = (HeapWord*)p;
1010 1010 if (!_nextMarkBitMap->isMarked(addr)) {
1011 1011 // We definitely need to mark it, irrespective whether we bail out
1012 1012 // because we're done with marking.
1013 1013 if (_nextMarkBitMap->parMark(addr)) {
1014 1014 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1015 1015 // If we're done with concurrent marking and we're waiting for
1016 1016 // remark, then we're not pushing anything on the stack.
1017 1017 return;
1018 1018 }
1019 1019
1020 1020 // No OrderAccess:store_load() is needed. It is implicit in the
1021 1021 // CAS done in parMark(addr) above
1022 1022 HeapWord* finger = _finger;
1023 1023
1024 1024 if (addr < finger) {
1025 1025 if (!mark_stack_push(oop(addr))) {
1026 1026 if (verbose_low()) {
1027 1027 gclog_or_tty->print_cr("[global] global stack overflow "
1028 1028 "during parMark");
1029 1029 }
1030 1030 }
1031 1031 }
1032 1032 }
1033 1033 }
1034 1034 }
1035 1035
1036 1036 class CMConcurrentMarkingTask: public AbstractGangTask {
1037 1037 private:
1038 1038 ConcurrentMark* _cm;
1039 1039 ConcurrentMarkThread* _cmt;
1040 1040
1041 1041 public:
1042 1042 void work(int worker_i) {
1043 1043 assert(Thread::current()->is_ConcurrentGC_thread(),
1044 1044 "this should only be done by a conc GC thread");
1045 1045 ResourceMark rm;
1046 1046
1047 1047 double start_vtime = os::elapsedVTime();
1048 1048
1049 1049 ConcurrentGCThread::stsJoin();
1050 1050
1051 1051 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1052 1052 CMTask* the_task = _cm->task(worker_i);
1053 1053 the_task->record_start_time();
1054 1054 if (!_cm->has_aborted()) {
1055 1055 do {
1056 1056 double start_vtime_sec = os::elapsedVTime();
1057 1057 double start_time_sec = os::elapsedTime();
1058 1058 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1059 1059
1060 1060 the_task->do_marking_step(mark_step_duration_ms,
1061 1061 true /* do_stealing */,
1062 1062 true /* do_termination */);
1063 1063
1064 1064 double end_time_sec = os::elapsedTime();
1065 1065 double end_vtime_sec = os::elapsedVTime();
1066 1066 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1067 1067 double elapsed_time_sec = end_time_sec - start_time_sec;
1068 1068 _cm->clear_has_overflown();
1069 1069
1070 1070 bool ret = _cm->do_yield_check(worker_i);
1071 1071
1072 1072 jlong sleep_time_ms;
1073 1073 if (!_cm->has_aborted() && the_task->has_aborted()) {
1074 1074 sleep_time_ms =
1075 1075 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1076 1076 ConcurrentGCThread::stsLeave();
1077 1077 os::sleep(Thread::current(), sleep_time_ms, false);
1078 1078 ConcurrentGCThread::stsJoin();
1079 1079 }
1080 1080 double end_time2_sec = os::elapsedTime();
1081 1081 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1082 1082
1083 1083 #if 0
1084 1084 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1085 1085 "overhead %1.4lf",
1086 1086 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1087 1087 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1088 1088 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1089 1089 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1090 1090 #endif
1091 1091 } while (!_cm->has_aborted() && the_task->has_aborted());
1092 1092 }
1093 1093 the_task->record_end_time();
1094 1094 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1095 1095
1096 1096 ConcurrentGCThread::stsLeave();
1097 1097
1098 1098 double end_vtime = os::elapsedVTime();
1099 1099 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1100 1100 }
1101 1101
1102 1102 CMConcurrentMarkingTask(ConcurrentMark* cm,
1103 1103 ConcurrentMarkThread* cmt) :
1104 1104 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1105 1105
1106 1106 ~CMConcurrentMarkingTask() { }
1107 1107 };
1108 1108
1109 1109 void ConcurrentMark::markFromRoots() {
1110 1110 // we might be tempted to assert that:
1111 1111 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1112 1112 // "inconsistent argument?");
1113 1113 // However that wouldn't be right, because it's possible that
1114 1114 // a safepoint is indeed in progress as a younger generation
1115 1115 // stop-the-world GC happens even as we mark in this generation.
1116 1116
1117 1117 _restart_for_overflow = false;
1118 1118
1119 1119 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1120 1120 force_overflow_conc()->init();
1121 1121 set_phase(active_workers, true /* concurrent */);
1122 1122
1123 1123 CMConcurrentMarkingTask markingTask(this, cmThread());
1124 1124 if (parallel_marking_threads() > 0) {
1125 1125 _parallel_workers->run_task(&markingTask);
1126 1126 } else {
1127 1127 markingTask.work(0);
1128 1128 }
1129 1129 print_stats();
1130 1130 }
1131 1131
1132 1132 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1133 1133 // world is stopped at this checkpoint
1134 1134 assert(SafepointSynchronize::is_at_safepoint(),
1135 1135 "world should be stopped");
1136 1136 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1137 1137
1138 1138 // If a full collection has happened, we shouldn't do this.
1139 1139 if (has_aborted()) {
1140 1140 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1141 1141 return;
1142 1142 }
1143 1143
1144 1144 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1145 1145
1146 1146 if (VerifyDuringGC) {
1147 1147 HandleMark hm; // handle scope
1148 1148 gclog_or_tty->print(" VerifyDuringGC:(before)");
1149 1149 Universe::heap()->prepare_for_verify();
1150 1150 Universe::verify(/* allow dirty */ true,
1151 1151 /* silent */ false,
1152 1152 /* option */ VerifyOption_G1UsePrevMarking);
1153 1153 }
1154 1154
1155 1155 G1CollectorPolicy* g1p = g1h->g1_policy();
1156 1156 g1p->record_concurrent_mark_remark_start();
1157 1157
1158 1158 double start = os::elapsedTime();
1159 1159
1160 1160 checkpointRootsFinalWork();
1161 1161
1162 1162 double mark_work_end = os::elapsedTime();
1163 1163
1164 1164 weakRefsWork(clear_all_soft_refs);
1165 1165
1166 1166 if (has_overflown()) {
1167 1167 // Oops. We overflowed. Restart concurrent marking.
1168 1168 _restart_for_overflow = true;
1169 1169 // Clear the flag. We do not need it any more.
1170 1170 clear_has_overflown();
1171 1171 if (G1TraceMarkStackOverflow) {
1172 1172 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1173 1173 }
1174 1174 } else {
1175 1175 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1176 1176 // We're done with marking.
1177 1177 // This is the end of the marking cycle, we're expected all
1178 1178 // threads to have SATB queues with active set to true.
1179 1179 satb_mq_set.set_active_all_threads(false, /* new active value */
1180 1180 true /* expected_active */);
1181 1181
1182 1182 if (VerifyDuringGC) {
1183 1183 HandleMark hm; // handle scope
1184 1184 gclog_or_tty->print(" VerifyDuringGC:(after)");
1185 1185 Universe::heap()->prepare_for_verify();
1186 1186 Universe::verify(/* allow dirty */ true,
1187 1187 /* silent */ false,
1188 1188 /* option */ VerifyOption_G1UseNextMarking);
1189 1189 }
1190 1190 assert(!restart_for_overflow(), "sanity");
1191 1191 }
1192 1192
1193 1193 // Reset the marking state if marking completed
1194 1194 if (!restart_for_overflow()) {
1195 1195 set_non_marking_state();
1196 1196 }
1197 1197
1198 1198 #if VERIFY_OBJS_PROCESSED
1199 1199 _scan_obj_cl.objs_processed = 0;
1200 1200 ThreadLocalObjQueue::objs_enqueued = 0;
1201 1201 #endif
1202 1202
1203 1203 // Statistics
1204 1204 double now = os::elapsedTime();
1205 1205 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1206 1206 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1207 1207 _remark_times.add((now - start) * 1000.0);
1208 1208
1209 1209 g1p->record_concurrent_mark_remark_end();
1210 1210 }
1211 1211
1212 1212 #define CARD_BM_TEST_MODE 0
1213 1213
1214 1214 class CalcLiveObjectsClosure: public HeapRegionClosure {
1215 1215
1216 1216 CMBitMapRO* _bm;
1217 1217 ConcurrentMark* _cm;
1218 1218 bool _changed;
1219 1219 bool _yield;
1220 1220 size_t _words_done;
1221 1221 size_t _tot_live;
1222 1222 size_t _tot_used;
1223 1223 size_t _regions_done;
1224 1224 double _start_vtime_sec;
1225 1225
1226 1226 BitMap* _region_bm;
1227 1227 BitMap* _card_bm;
1228 1228 intptr_t _bottom_card_num;
1229 1229 bool _final;
1230 1230
1231 1231 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1232 1232 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1233 1233 #if CARD_BM_TEST_MODE
1234 1234 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1235 1235 #else
1236 1236 _card_bm->par_at_put(i - _bottom_card_num, 1);
1237 1237 #endif
1238 1238 }
1239 1239 }
1240 1240
1241 1241 public:
1242 1242 CalcLiveObjectsClosure(bool final,
1243 1243 CMBitMapRO *bm, ConcurrentMark *cm,
1244 1244 BitMap* region_bm, BitMap* card_bm) :
1245 1245 _bm(bm), _cm(cm), _changed(false), _yield(true),
1246 1246 _words_done(0), _tot_live(0), _tot_used(0),
1247 1247 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1248 1248 _regions_done(0), _start_vtime_sec(0.0)
1249 1249 {
1250 1250 _bottom_card_num =
1251 1251 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1252 1252 CardTableModRefBS::card_shift);
1253 1253 }
1254 1254
1255 1255 // It takes a region that's not empty (i.e., it has at least one
1256 1256 // live object in it and sets its corresponding bit on the region
1257 1257 // bitmap to 1. If the region is "starts humongous" it will also set
1258 1258 // to 1 the bits on the region bitmap that correspond to its
1259 1259 // associated "continues humongous" regions.
1260 1260 void set_bit_for_region(HeapRegion* hr) {
1261 1261 assert(!hr->continuesHumongous(), "should have filtered those out");
1262 1262
1263 1263 size_t index = hr->hrs_index();
1264 1264 if (!hr->startsHumongous()) {
1265 1265 // Normal (non-humongous) case: just set the bit.
1266 1266 _region_bm->par_at_put((BitMap::idx_t) index, true);
1267 1267 } else {
1268 1268 // Starts humongous case: calculate how many regions are part of
1269 1269 // this humongous region and then set the bit range. It might
1270 1270 // have been a bit more efficient to look at the object that
1271 1271 // spans these humongous regions to calculate their number from
1272 1272 // the object's size. However, it's a good idea to calculate
1273 1273 // this based on the metadata itself, and not the region
1274 1274 // contents, so that this code is not aware of what goes into
1275 1275 // the humongous regions (in case this changes in the future).
1276 1276 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1277 1277 size_t end_index = index + 1;
1278 1278 while (end_index < g1h->n_regions()) {
1279 1279 HeapRegion* chr = g1h->region_at(end_index);
1280 1280 if (!chr->continuesHumongous()) break;
1281 1281 end_index += 1;
1282 1282 }
1283 1283 _region_bm->par_at_put_range((BitMap::idx_t) index,
1284 1284 (BitMap::idx_t) end_index, true);
1285 1285 }
1286 1286 }
1287 1287
1288 1288 bool doHeapRegion(HeapRegion* hr) {
1289 1289 if (!_final && _regions_done == 0) {
1290 1290 _start_vtime_sec = os::elapsedVTime();
1291 1291 }
1292 1292
1293 1293 if (hr->continuesHumongous()) {
1294 1294 // We will ignore these here and process them when their
1295 1295 // associated "starts humongous" region is processed (see
1296 1296 // set_bit_for_heap_region()). Note that we cannot rely on their
1297 1297 // associated "starts humongous" region to have their bit set to
1298 1298 // 1 since, due to the region chunking in the parallel region
1299 1299 // iteration, a "continues humongous" region might be visited
1300 1300 // before its associated "starts humongous".
1301 1301 return false;
1302 1302 }
1303 1303
1304 1304 HeapWord* nextTop = hr->next_top_at_mark_start();
1305 1305 HeapWord* start = hr->top_at_conc_mark_count();
1306 1306 assert(hr->bottom() <= start && start <= hr->end() &&
1307 1307 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1308 1308 start <= nextTop,
1309 1309 "Preconditions.");
1310 1310 // Otherwise, record the number of word's we'll examine.
1311 1311 size_t words_done = (nextTop - start);
1312 1312 // Find the first marked object at or after "start".
1313 1313 start = _bm->getNextMarkedWordAddress(start, nextTop);
1314 1314 size_t marked_bytes = 0;
1315 1315
1316 1316 // Below, the term "card num" means the result of shifting an address
1317 1317 // by the card shift -- address 0 corresponds to card number 0. One
1318 1318 // must subtract the card num of the bottom of the heap to obtain a
1319 1319 // card table index.
1320 1320 // The first card num of the sequence of live cards currently being
1321 1321 // constructed. -1 ==> no sequence.
1322 1322 intptr_t start_card_num = -1;
1323 1323 // The last card num of the sequence of live cards currently being
1324 1324 // constructed. -1 ==> no sequence.
1325 1325 intptr_t last_card_num = -1;
1326 1326
1327 1327 while (start < nextTop) {
1328 1328 if (_yield && _cm->do_yield_check()) {
1329 1329 // We yielded. It might be for a full collection, in which case
1330 1330 // all bets are off; terminate the traversal.
1331 1331 if (_cm->has_aborted()) {
1332 1332 _changed = false;
1333 1333 return true;
1334 1334 } else {
1335 1335 // Otherwise, it might be a collection pause, and the region
1336 1336 // we're looking at might be in the collection set. We'll
1337 1337 // abandon this region.
1338 1338 return false;
1339 1339 }
1340 1340 }
1341 1341 oop obj = oop(start);
1342 1342 int obj_sz = obj->size();
1343 1343 // The card num of the start of the current object.
1344 1344 intptr_t obj_card_num =
1345 1345 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1346 1346
1347 1347 HeapWord* obj_last = start + obj_sz - 1;
1348 1348 intptr_t obj_last_card_num =
1349 1349 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1350 1350
1351 1351 if (obj_card_num != last_card_num) {
1352 1352 if (start_card_num == -1) {
1353 1353 assert(last_card_num == -1, "Both or neither.");
1354 1354 start_card_num = obj_card_num;
1355 1355 } else {
1356 1356 assert(last_card_num != -1, "Both or neither.");
1357 1357 assert(obj_card_num >= last_card_num, "Inv");
1358 1358 if ((obj_card_num - last_card_num) > 1) {
1359 1359 // Mark the last run, and start a new one.
1360 1360 mark_card_num_range(start_card_num, last_card_num);
1361 1361 start_card_num = obj_card_num;
1362 1362 }
1363 1363 }
1364 1364 #if CARD_BM_TEST_MODE
1365 1365 /*
1366 1366 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1367 1367 obj_card_num - _bottom_card_num,
1368 1368 obj_last_card_num - _bottom_card_num);
1369 1369 */
1370 1370 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1371 1371 _card_bm->par_at_put(j - _bottom_card_num, 1);
1372 1372 }
1373 1373 #endif
1374 1374 }
1375 1375 // In any case, we set the last card num.
1376 1376 last_card_num = obj_last_card_num;
1377 1377
1378 1378 marked_bytes += (size_t)obj_sz * HeapWordSize;
1379 1379 // Find the next marked object after this one.
1380 1380 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1381 1381 _changed = true;
1382 1382 }
1383 1383 // Handle the last range, if any.
1384 1384 if (start_card_num != -1) {
1385 1385 mark_card_num_range(start_card_num, last_card_num);
1386 1386 }
1387 1387 if (_final) {
1388 1388 // Mark the allocated-since-marking portion...
1389 1389 HeapWord* tp = hr->top();
1390 1390 if (nextTop < tp) {
1391 1391 start_card_num =
1392 1392 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1393 1393 last_card_num =
1394 1394 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1395 1395 mark_card_num_range(start_card_num, last_card_num);
1396 1396 // This definitely means the region has live objects.
1397 1397 set_bit_for_region(hr);
1398 1398 }
1399 1399 }
1400 1400
1401 1401 hr->add_to_marked_bytes(marked_bytes);
1402 1402 // Update the live region bitmap.
1403 1403 if (marked_bytes > 0) {
1404 1404 set_bit_for_region(hr);
1405 1405 }
1406 1406 hr->set_top_at_conc_mark_count(nextTop);
1407 1407 _tot_live += hr->next_live_bytes();
1408 1408 _tot_used += hr->used();
1409 1409 _words_done = words_done;
1410 1410
1411 1411 if (!_final) {
1412 1412 ++_regions_done;
1413 1413 if (_regions_done % 10 == 0) {
1414 1414 double end_vtime_sec = os::elapsedVTime();
1415 1415 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1416 1416 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1417 1417 jlong sleep_time_ms =
1418 1418 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1419 1419 os::sleep(Thread::current(), sleep_time_ms, false);
1420 1420 _start_vtime_sec = end_vtime_sec;
1421 1421 }
1422 1422 }
1423 1423 }
1424 1424
1425 1425 return false;
1426 1426 }
1427 1427
1428 1428 bool changed() { return _changed; }
1429 1429 void reset() { _changed = false; _words_done = 0; }
1430 1430 void no_yield() { _yield = false; }
1431 1431 size_t words_done() { return _words_done; }
1432 1432 size_t tot_live() { return _tot_live; }
1433 1433 size_t tot_used() { return _tot_used; }
1434 1434 };
1435 1435
1436 1436
1437 1437 void ConcurrentMark::calcDesiredRegions() {
1438 1438 _region_bm.clear();
1439 1439 _card_bm.clear();
1440 1440 CalcLiveObjectsClosure calccl(false /*final*/,
1441 1441 nextMarkBitMap(), this,
1442 1442 &_region_bm, &_card_bm);
1443 1443 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1444 1444 g1h->heap_region_iterate(&calccl);
1445 1445
1446 1446 do {
1447 1447 calccl.reset();
1448 1448 g1h->heap_region_iterate(&calccl);
1449 1449 } while (calccl.changed());
1450 1450 }
1451 1451
1452 1452 class G1ParFinalCountTask: public AbstractGangTask {
1453 1453 protected:
1454 1454 G1CollectedHeap* _g1h;
1455 1455 CMBitMap* _bm;
1456 1456 size_t _n_workers;
1457 1457 size_t *_live_bytes;
1458 1458 size_t *_used_bytes;
1459 1459 BitMap* _region_bm;
1460 1460 BitMap* _card_bm;
1461 1461 public:
1462 1462 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1463 1463 BitMap* region_bm, BitMap* card_bm)
1464 1464 : AbstractGangTask("G1 final counting"), _g1h(g1h),
1465 1465 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
1466 1466 if (ParallelGCThreads > 0) {
1467 1467 _n_workers = _g1h->workers()->total_workers();
1468 1468 } else {
1469 1469 _n_workers = 1;
1470 1470 }
1471 1471 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1472 1472 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1473 1473 }
1474 1474
1475 1475 ~G1ParFinalCountTask() {
1476 1476 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1477 1477 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1478 1478 }
1479 1479
1480 1480 void work(int i) {
1481 1481 CalcLiveObjectsClosure calccl(true /*final*/,
1482 1482 _bm, _g1h->concurrent_mark(),
1483 1483 _region_bm, _card_bm);
1484 1484 calccl.no_yield();
1485 1485 if (G1CollectedHeap::use_parallel_gc_threads()) {
1486 1486 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1487 1487 HeapRegion::FinalCountClaimValue);
1488 1488 } else {
1489 1489 _g1h->heap_region_iterate(&calccl);
1490 1490 }
1491 1491 assert(calccl.complete(), "Shouldn't have yielded!");
1492 1492
1493 1493 assert((size_t) i < _n_workers, "invariant");
1494 1494 _live_bytes[i] = calccl.tot_live();
1495 1495 _used_bytes[i] = calccl.tot_used();
1496 1496 }
1497 1497 size_t live_bytes() {
1498 1498 size_t live_bytes = 0;
1499 1499 for (size_t i = 0; i < _n_workers; ++i)
1500 1500 live_bytes += _live_bytes[i];
1501 1501 return live_bytes;
1502 1502 }
1503 1503 size_t used_bytes() {
1504 1504 size_t used_bytes = 0;
1505 1505 for (size_t i = 0; i < _n_workers; ++i)
1506 1506 used_bytes += _used_bytes[i];
1507 1507 return used_bytes;
1508 1508 }
1509 1509 };
1510 1510
1511 1511 class G1ParNoteEndTask;
1512 1512
1513 1513 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1514 1514 G1CollectedHeap* _g1;
1515 1515 int _worker_num;
1516 1516 size_t _max_live_bytes;
1517 1517 size_t _regions_claimed;
1518 1518 size_t _freed_bytes;
1519 1519 FreeRegionList* _local_cleanup_list;
1520 1520 HumongousRegionSet* _humongous_proxy_set;
1521 1521 HRRSCleanupTask* _hrrs_cleanup_task;
1522 1522 double _claimed_region_time;
1523 1523 double _max_region_time;
1524 1524
1525 1525 public:
1526 1526 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1527 1527 int worker_num,
1528 1528 FreeRegionList* local_cleanup_list,
1529 1529 HumongousRegionSet* humongous_proxy_set,
1530 1530 HRRSCleanupTask* hrrs_cleanup_task);
1531 1531 size_t freed_bytes() { return _freed_bytes; }
1532 1532
1533 1533 bool doHeapRegion(HeapRegion *r);
1534 1534
1535 1535 size_t max_live_bytes() { return _max_live_bytes; }
1536 1536 size_t regions_claimed() { return _regions_claimed; }
1537 1537 double claimed_region_time_sec() { return _claimed_region_time; }
1538 1538 double max_region_time_sec() { return _max_region_time; }
1539 1539 };
1540 1540
1541 1541 class G1ParNoteEndTask: public AbstractGangTask {
1542 1542 friend class G1NoteEndOfConcMarkClosure;
1543 1543
1544 1544 protected:
1545 1545 G1CollectedHeap* _g1h;
1546 1546 size_t _max_live_bytes;
1547 1547 size_t _freed_bytes;
1548 1548 FreeRegionList* _cleanup_list;
1549 1549
1550 1550 public:
1551 1551 G1ParNoteEndTask(G1CollectedHeap* g1h,
1552 1552 FreeRegionList* cleanup_list) :
1553 1553 AbstractGangTask("G1 note end"), _g1h(g1h),
1554 1554 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1555 1555
1556 1556 void work(int i) {
1557 1557 double start = os::elapsedTime();
1558 1558 FreeRegionList local_cleanup_list("Local Cleanup List");
1559 1559 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1560 1560 HRRSCleanupTask hrrs_cleanup_task;
1561 1561 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1562 1562 &humongous_proxy_set,
1563 1563 &hrrs_cleanup_task);
1564 1564 if (G1CollectedHeap::use_parallel_gc_threads()) {
1565 1565 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1566 1566 HeapRegion::NoteEndClaimValue);
1567 1567 } else {
1568 1568 _g1h->heap_region_iterate(&g1_note_end);
1569 1569 }
1570 1570 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1571 1571
1572 1572 // Now update the lists
1573 1573 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1574 1574 NULL /* free_list */,
1575 1575 &humongous_proxy_set,
1576 1576 true /* par */);
1577 1577 {
1578 1578 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1579 1579 _max_live_bytes += g1_note_end.max_live_bytes();
1580 1580 _freed_bytes += g1_note_end.freed_bytes();
1581 1581
1582 1582 // If we iterate over the global cleanup list at the end of
1583 1583 // cleanup to do this printing we will not guarantee to only
1584 1584 // generate output for the newly-reclaimed regions (the list
1585 1585 // might not be empty at the beginning of cleanup; we might
1586 1586 // still be working on its previous contents). So we do the
1587 1587 // printing here, before we append the new regions to the global
1588 1588 // cleanup list.
1589 1589
1590 1590 G1HRPrinter* hr_printer = _g1h->hr_printer();
1591 1591 if (hr_printer->is_active()) {
1592 1592 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1593 1593 while (iter.more_available()) {
1594 1594 HeapRegion* hr = iter.get_next();
1595 1595 hr_printer->cleanup(hr);
1596 1596 }
1597 1597 }
1598 1598
1599 1599 _cleanup_list->add_as_tail(&local_cleanup_list);
1600 1600 assert(local_cleanup_list.is_empty(), "post-condition");
1601 1601
1602 1602 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1603 1603 }
1604 1604 double end = os::elapsedTime();
1605 1605 if (G1PrintParCleanupStats) {
1606 1606 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1607 1607 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1608 1608 i, start, end, (end-start)*1000.0,
1609 1609 g1_note_end.regions_claimed(),
1610 1610 g1_note_end.claimed_region_time_sec()*1000.0,
1611 1611 g1_note_end.max_region_time_sec()*1000.0);
1612 1612 }
1613 1613 }
1614 1614 size_t max_live_bytes() { return _max_live_bytes; }
1615 1615 size_t freed_bytes() { return _freed_bytes; }
1616 1616 };
1617 1617
1618 1618 class G1ParScrubRemSetTask: public AbstractGangTask {
1619 1619 protected:
1620 1620 G1RemSet* _g1rs;
1621 1621 BitMap* _region_bm;
1622 1622 BitMap* _card_bm;
1623 1623 public:
1624 1624 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1625 1625 BitMap* region_bm, BitMap* card_bm) :
1626 1626 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1627 1627 _region_bm(region_bm), _card_bm(card_bm)
1628 1628 {}
1629 1629
1630 1630 void work(int i) {
1631 1631 if (G1CollectedHeap::use_parallel_gc_threads()) {
1632 1632 _g1rs->scrub_par(_region_bm, _card_bm, i,
1633 1633 HeapRegion::ScrubRemSetClaimValue);
1634 1634 } else {
1635 1635 _g1rs->scrub(_region_bm, _card_bm);
1636 1636 }
1637 1637 }
1638 1638
1639 1639 };
1640 1640
1641 1641 G1NoteEndOfConcMarkClosure::
1642 1642 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1643 1643 int worker_num,
1644 1644 FreeRegionList* local_cleanup_list,
1645 1645 HumongousRegionSet* humongous_proxy_set,
1646 1646 HRRSCleanupTask* hrrs_cleanup_task)
1647 1647 : _g1(g1), _worker_num(worker_num),
1648 1648 _max_live_bytes(0), _regions_claimed(0),
1649 1649 _freed_bytes(0),
1650 1650 _claimed_region_time(0.0), _max_region_time(0.0),
1651 1651 _local_cleanup_list(local_cleanup_list),
1652 1652 _humongous_proxy_set(humongous_proxy_set),
1653 1653 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1654 1654
1655 1655 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1656 1656 // We use a claim value of zero here because all regions
1657 1657 // were claimed with value 1 in the FinalCount task.
1658 1658 hr->reset_gc_time_stamp();
1659 1659 if (!hr->continuesHumongous()) {
1660 1660 double start = os::elapsedTime();
1661 1661 _regions_claimed++;
1662 1662 hr->note_end_of_marking();
1663 1663 _max_live_bytes += hr->max_live_bytes();
1664 1664 _g1->free_region_if_empty(hr,
1665 1665 &_freed_bytes,
1666 1666 _local_cleanup_list,
1667 1667 _humongous_proxy_set,
1668 1668 _hrrs_cleanup_task,
1669 1669 true /* par */);
1670 1670 double region_time = (os::elapsedTime() - start);
1671 1671 _claimed_region_time += region_time;
1672 1672 if (region_time > _max_region_time) {
1673 1673 _max_region_time = region_time;
1674 1674 }
1675 1675 }
1676 1676 return false;
1677 1677 }
1678 1678
1679 1679 void ConcurrentMark::cleanup() {
1680 1680 // world is stopped at this checkpoint
1681 1681 assert(SafepointSynchronize::is_at_safepoint(),
1682 1682 "world should be stopped");
1683 1683 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1684 1684
1685 1685 // If a full collection has happened, we shouldn't do this.
1686 1686 if (has_aborted()) {
1687 1687 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1688 1688 return;
1689 1689 }
1690 1690
1691 1691 g1h->verify_region_sets_optional();
1692 1692
1693 1693 if (VerifyDuringGC) {
1694 1694 HandleMark hm; // handle scope
1695 1695 gclog_or_tty->print(" VerifyDuringGC:(before)");
1696 1696 Universe::heap()->prepare_for_verify();
1697 1697 Universe::verify(/* allow dirty */ true,
1698 1698 /* silent */ false,
1699 1699 /* option */ VerifyOption_G1UsePrevMarking);
1700 1700 }
1701 1701
1702 1702 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1703 1703 g1p->record_concurrent_mark_cleanup_start();
1704 1704
1705 1705 double start = os::elapsedTime();
1706 1706
1707 1707 HeapRegionRemSet::reset_for_cleanup_tasks();
1708 1708
1709 1709 // Do counting once more with the world stopped for good measure.
1710 1710 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1711 1711 &_region_bm, &_card_bm);
1712 1712 if (G1CollectedHeap::use_parallel_gc_threads()) {
1713 1713 assert(g1h->check_heap_region_claim_values(
1714 1714 HeapRegion::InitialClaimValue),
1715 1715 "sanity check");
1716 1716
1717 1717 int n_workers = g1h->workers()->total_workers();
1718 1718 g1h->set_par_threads(n_workers);
1719 1719 g1h->workers()->run_task(&g1_par_count_task);
1720 1720 g1h->set_par_threads(0);
1721 1721
1722 1722 assert(g1h->check_heap_region_claim_values(
1723 1723 HeapRegion::FinalCountClaimValue),
1724 1724 "sanity check");
1725 1725 } else {
1726 1726 g1_par_count_task.work(0);
1727 1727 }
1728 1728
1729 1729 size_t known_garbage_bytes =
1730 1730 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1731 1731 g1p->set_known_garbage_bytes(known_garbage_bytes);
1732 1732
1733 1733 size_t start_used_bytes = g1h->used();
1734 1734 _at_least_one_mark_complete = true;
1735 1735 g1h->set_marking_complete();
1736 1736
1737 1737 ergo_verbose4(ErgoConcCycles,
1738 1738 "finish cleanup",
1739 1739 ergo_format_byte("occupancy")
1740 1740 ergo_format_byte("capacity")
1741 1741 ergo_format_byte_perc("known garbage"),
1742 1742 start_used_bytes, g1h->capacity(),
1743 1743 known_garbage_bytes,
1744 1744 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1745 1745
1746 1746 double count_end = os::elapsedTime();
1747 1747 double this_final_counting_time = (count_end - start);
1748 1748 if (G1PrintParCleanupStats) {
1749 1749 gclog_or_tty->print_cr("Cleanup:");
1750 1750 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1751 1751 this_final_counting_time*1000.0);
1752 1752 }
1753 1753 _total_counting_time += this_final_counting_time;
1754 1754
1755 1755 if (G1PrintRegionLivenessInfo) {
1756 1756 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1757 1757 _g1h->heap_region_iterate(&cl);
1758 1758 }
1759 1759
1760 1760 // Install newly created mark bitMap as "prev".
1761 1761 swapMarkBitMaps();
1762 1762
1763 1763 g1h->reset_gc_time_stamp();
1764 1764
1765 1765 // Note end of marking in all heap regions.
1766 1766 double note_end_start = os::elapsedTime();
1767 1767 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1768 1768 if (G1CollectedHeap::use_parallel_gc_threads()) {
1769 1769 int n_workers = g1h->workers()->total_workers();
1770 1770 g1h->set_par_threads(n_workers);
1771 1771 g1h->workers()->run_task(&g1_par_note_end_task);
1772 1772 g1h->set_par_threads(0);
1773 1773
1774 1774 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1775 1775 "sanity check");
1776 1776 } else {
1777 1777 g1_par_note_end_task.work(0);
1778 1778 }
1779 1779
1780 1780 if (!cleanup_list_is_empty()) {
1781 1781 // The cleanup list is not empty, so we'll have to process it
1782 1782 // concurrently. Notify anyone else that might be wanting free
1783 1783 // regions that there will be more free regions coming soon.
1784 1784 g1h->set_free_regions_coming();
1785 1785 }
1786 1786 double note_end_end = os::elapsedTime();
1787 1787 if (G1PrintParCleanupStats) {
1788 1788 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1789 1789 (note_end_end - note_end_start)*1000.0);
1790 1790 }
1791 1791
1792 1792
1793 1793 // call below, since it affects the metric by which we sort the heap
1794 1794 // regions.
1795 1795 if (G1ScrubRemSets) {
1796 1796 double rs_scrub_start = os::elapsedTime();
1797 1797 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1798 1798 if (G1CollectedHeap::use_parallel_gc_threads()) {
1799 1799 int n_workers = g1h->workers()->total_workers();
1800 1800 g1h->set_par_threads(n_workers);
1801 1801 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1802 1802 g1h->set_par_threads(0);
1803 1803
1804 1804 assert(g1h->check_heap_region_claim_values(
1805 1805 HeapRegion::ScrubRemSetClaimValue),
1806 1806 "sanity check");
1807 1807 } else {
1808 1808 g1_par_scrub_rs_task.work(0);
1809 1809 }
1810 1810
1811 1811 double rs_scrub_end = os::elapsedTime();
1812 1812 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1813 1813 _total_rs_scrub_time += this_rs_scrub_time;
1814 1814 }
1815 1815
1816 1816 // this will also free any regions totally full of garbage objects,
1817 1817 // and sort the regions.
1818 1818 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1819 1819 g1_par_note_end_task.freed_bytes(),
1820 1820 g1_par_note_end_task.max_live_bytes());
1821 1821
1822 1822 // Statistics.
1823 1823 double end = os::elapsedTime();
1824 1824 _cleanup_times.add((end - start) * 1000.0);
1825 1825
1826 1826 // G1CollectedHeap::heap()->print();
1827 1827 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1828 1828 // G1CollectedHeap::heap()->get_gc_time_stamp());
1829 1829
1830 1830 if (PrintGC || PrintGCDetails) {
1831 1831 g1h->print_size_transition(gclog_or_tty,
1832 1832 start_used_bytes,
1833 1833 g1h->used(),
1834 1834 g1h->capacity());
1835 1835 }
1836 1836
1837 1837 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1838 1838 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1839 1839
1840 1840 // We need to make this be a "collection" so any collection pause that
1841 1841 // races with it goes around and waits for completeCleanup to finish.
1842 1842 g1h->increment_total_collections();
1843 1843
1844 1844 if (VerifyDuringGC) {
1845 1845 HandleMark hm; // handle scope
1846 1846 gclog_or_tty->print(" VerifyDuringGC:(after)");
1847 1847 Universe::heap()->prepare_for_verify();
1848 1848 Universe::verify(/* allow dirty */ true,
1849 1849 /* silent */ false,
1850 1850 /* option */ VerifyOption_G1UsePrevMarking);
1851 1851 }
1852 1852
1853 1853 g1h->verify_region_sets_optional();
1854 1854 }
1855 1855
1856 1856 void ConcurrentMark::completeCleanup() {
1857 1857 if (has_aborted()) return;
1858 1858
1859 1859 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1860 1860
1861 1861 _cleanup_list.verify_optional();
1862 1862 FreeRegionList tmp_free_list("Tmp Free List");
1863 1863
1864 1864 if (G1ConcRegionFreeingVerbose) {
1865 1865 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1866 1866 "cleanup list has "SIZE_FORMAT" entries",
1867 1867 _cleanup_list.length());
1868 1868 }
1869 1869
1870 1870 // Noone else should be accessing the _cleanup_list at this point,
1871 1871 // so it's not necessary to take any locks
1872 1872 while (!_cleanup_list.is_empty()) {
1873 1873 HeapRegion* hr = _cleanup_list.remove_head();
1874 1874 assert(hr != NULL, "the list was not empty");
1875 1875 hr->par_clear();
1876 1876 tmp_free_list.add_as_tail(hr);
1877 1877
1878 1878 // Instead of adding one region at a time to the secondary_free_list,
1879 1879 // we accumulate them in the local list and move them a few at a
1880 1880 // time. This also cuts down on the number of notify_all() calls
1881 1881 // we do during this process. We'll also append the local list when
1882 1882 // _cleanup_list is empty (which means we just removed the last
1883 1883 // region from the _cleanup_list).
1884 1884 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1885 1885 _cleanup_list.is_empty()) {
1886 1886 if (G1ConcRegionFreeingVerbose) {
1887 1887 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1888 1888 "appending "SIZE_FORMAT" entries to the "
1889 1889 "secondary_free_list, clean list still has "
1890 1890 SIZE_FORMAT" entries",
1891 1891 tmp_free_list.length(),
1892 1892 _cleanup_list.length());
1893 1893 }
1894 1894
1895 1895 {
1896 1896 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1897 1897 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1898 1898 SecondaryFreeList_lock->notify_all();
1899 1899 }
1900 1900
1901 1901 if (G1StressConcRegionFreeing) {
1902 1902 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1903 1903 os::sleep(Thread::current(), (jlong) 1, false);
1904 1904 }
1905 1905 }
1906 1906 }
1907 1907 }
1908 1908 assert(tmp_free_list.is_empty(), "post-condition");
1909 1909 }
1910 1910
1911 1911 // Support closures for reference procssing in G1
1912 1912
1913 1913 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1914 1914 HeapWord* addr = (HeapWord*)obj;
1915 1915 return addr != NULL &&
1916 1916 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1917 1917 }
1918 1918
1919 1919 class G1CMKeepAliveClosure: public OopClosure {
1920 1920 G1CollectedHeap* _g1;
1921 1921 ConcurrentMark* _cm;
1922 1922 CMBitMap* _bitMap;
1923 1923 public:
1924 1924 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1925 1925 CMBitMap* bitMap) :
1926 1926 _g1(g1), _cm(cm),
1927 1927 _bitMap(bitMap) {}
1928 1928
1929 1929 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1930 1930 virtual void do_oop( oop* p) { do_oop_work(p); }
1931 1931
1932 1932 template <class T> void do_oop_work(T* p) {
1933 1933 oop obj = oopDesc::load_decode_heap_oop(p);
1934 1934 HeapWord* addr = (HeapWord*)obj;
1935 1935
1936 1936 if (_cm->verbose_high()) {
1937 1937 gclog_or_tty->print_cr("\t[0] we're looking at location "
1938 1938 "*"PTR_FORMAT" = "PTR_FORMAT,
1939 1939 p, (void*) obj);
1940 1940 }
1941 1941
1942 1942 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1943 1943 _bitMap->mark(addr);
1944 1944 _cm->mark_stack_push(obj);
1945 1945 }
1946 1946 }
1947 1947 };
1948 1948
1949 1949 class G1CMDrainMarkingStackClosure: public VoidClosure {
1950 1950 CMMarkStack* _markStack;
1951 1951 CMBitMap* _bitMap;
1952 1952 G1CMKeepAliveClosure* _oopClosure;
1953 1953 public:
1954 1954 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1955 1955 G1CMKeepAliveClosure* oopClosure) :
1956 1956 _bitMap(bitMap),
1957 1957 _markStack(markStack),
1958 1958 _oopClosure(oopClosure)
1959 1959 {}
1960 1960
1961 1961 void do_void() {
1962 1962 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1963 1963 }
1964 1964 };
1965 1965
1966 1966 // 'Keep Alive' closure used by parallel reference processing.
1967 1967 // An instance of this closure is used in the parallel reference processing
1968 1968 // code rather than an instance of G1CMKeepAliveClosure. We could have used
1969 1969 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
1970 1970 // placed on to discovered ref lists once so we can mark and push with no
1971 1971 // need to check whether the object has already been marked. Using the
1972 1972 // G1CMKeepAliveClosure would mean, however, having all the worker threads
1973 1973 // operating on the global mark stack. This means that an individual
1974 1974 // worker would be doing lock-free pushes while it processes its own
1975 1975 // discovered ref list followed by drain call. If the discovered ref lists
1976 1976 // are unbalanced then this could cause interference with the other
1977 1977 // workers. Using a CMTask (and its embedded local data structures)
1978 1978 // avoids that potential interference.
1979 1979 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
1980 1980 ConcurrentMark* _cm;
1981 1981 CMTask* _task;
1982 1982 CMBitMap* _bitMap;
1983 1983 int _ref_counter_limit;
1984 1984 int _ref_counter;
1985 1985 public:
1986 1986 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
1987 1987 CMTask* task,
1988 1988 CMBitMap* bitMap) :
1989 1989 _cm(cm), _task(task), _bitMap(bitMap),
1990 1990 _ref_counter_limit(G1RefProcDrainInterval)
1991 1991 {
1992 1992 assert(_ref_counter_limit > 0, "sanity");
1993 1993 _ref_counter = _ref_counter_limit;
1994 1994 }
1995 1995
1996 1996 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1997 1997 virtual void do_oop( oop* p) { do_oop_work(p); }
1998 1998
1999 1999 template <class T> void do_oop_work(T* p) {
2000 2000 if (!_cm->has_overflown()) {
2001 2001 oop obj = oopDesc::load_decode_heap_oop(p);
2002 2002 if (_cm->verbose_high()) {
2003 2003 gclog_or_tty->print_cr("\t[%d] we're looking at location "
2004 2004 "*"PTR_FORMAT" = "PTR_FORMAT,
2005 2005 _task->task_id(), p, (void*) obj);
2006 2006 }
2007 2007
2008 2008 _task->deal_with_reference(obj);
2009 2009 _ref_counter--;
2010 2010
2011 2011 if (_ref_counter == 0) {
2012 2012 // We have dealt with _ref_counter_limit references, pushing them and objects
2013 2013 // reachable from them on to the local stack (and possibly the global stack).
2014 2014 // Call do_marking_step() to process these entries. We call the routine in a
2015 2015 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2016 2016 // with the entries that we've pushed as a result of the deal_with_reference
2017 2017 // calls above) or we overflow.
2018 2018 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2019 2019 // while there may still be some work to do. (See the comment at the
2020 2020 // beginning of CMTask::do_marking_step() for those conditions - one of which
2021 2021 // is reaching the specified time target.) It is only when
2022 2022 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2023 2023 // that the marking has completed.
2024 2024 do {
2025 2025 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2026 2026 _task->do_marking_step(mark_step_duration_ms,
2027 2027 false /* do_stealing */,
2028 2028 false /* do_termination */);
2029 2029 } while (_task->has_aborted() && !_cm->has_overflown());
2030 2030 _ref_counter = _ref_counter_limit;
2031 2031 }
2032 2032 } else {
2033 2033 if (_cm->verbose_high()) {
2034 2034 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2035 2035 }
2036 2036 }
2037 2037 }
2038 2038 };
2039 2039
2040 2040 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2041 2041 ConcurrentMark* _cm;
2042 2042 CMTask* _task;
2043 2043 public:
2044 2044 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2045 2045 _cm(cm), _task(task)
2046 2046 {}
2047 2047
2048 2048 void do_void() {
2049 2049 do {
2050 2050 if (_cm->verbose_high()) {
2051 2051 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2052 2052 _task->task_id());
2053 2053 }
2054 2054
2055 2055 // We call CMTask::do_marking_step() to completely drain the local and
2056 2056 // global marking stacks. The routine is called in a loop, which we'll
2057 2057 // exit if there's nothing more to do (i.e. we'completely drained the
2058 2058 // entries that were pushed as a result of applying the
2059 2059 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2060 2060 // lists above) or we overflow the global marking stack.
2061 2061 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2062 2062 // while there may still be some work to do. (See the comment at the
2063 2063 // beginning of CMTask::do_marking_step() for those conditions - one of which
2064 2064 // is reaching the specified time target.) It is only when
2065 2065 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2066 2066 // that the marking has completed.
2067 2067
2068 2068 _task->do_marking_step(1000000000.0 /* something very large */,
2069 2069 true /* do_stealing */,
2070 2070 true /* do_termination */);
2071 2071 } while (_task->has_aborted() && !_cm->has_overflown());
2072 2072 }
2073 2073 };
2074 2074
2075 2075 // Implementation of AbstractRefProcTaskExecutor for G1
2076 2076 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2077 2077 private:
2078 2078 G1CollectedHeap* _g1h;
2079 2079 ConcurrentMark* _cm;
2080 2080 CMBitMap* _bitmap;
2081 2081 WorkGang* _workers;
2082 2082 int _active_workers;
2083 2083
2084 2084 public:
2085 2085 G1RefProcTaskExecutor(G1CollectedHeap* g1h,
2086 2086 ConcurrentMark* cm,
2087 2087 CMBitMap* bitmap,
2088 2088 WorkGang* workers,
2089 2089 int n_workers) :
2090 2090 _g1h(g1h), _cm(cm), _bitmap(bitmap),
2091 2091 _workers(workers), _active_workers(n_workers)
2092 2092 { }
2093 2093
2094 2094 // Executes the given task using concurrent marking worker threads.
2095 2095 virtual void execute(ProcessTask& task);
2096 2096 virtual void execute(EnqueueTask& task);
2097 2097 };
2098 2098
2099 2099 class G1RefProcTaskProxy: public AbstractGangTask {
2100 2100 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2101 2101 ProcessTask& _proc_task;
2102 2102 G1CollectedHeap* _g1h;
2103 2103 ConcurrentMark* _cm;
2104 2104 CMBitMap* _bitmap;
2105 2105
2106 2106 public:
2107 2107 G1RefProcTaskProxy(ProcessTask& proc_task,
2108 2108 G1CollectedHeap* g1h,
2109 2109 ConcurrentMark* cm,
2110 2110 CMBitMap* bitmap) :
2111 2111 AbstractGangTask("Process reference objects in parallel"),
2112 2112 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2113 2113 {}
2114 2114
2115 2115 virtual void work(int i) {
2116 2116 CMTask* marking_task = _cm->task(i);
2117 2117 G1CMIsAliveClosure g1_is_alive(_g1h);
2118 2118 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2119 2119 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2120 2120
2121 2121 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2122 2122 }
2123 2123 };
2124 2124
2125 2125 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
2126 2126 assert(_workers != NULL, "Need parallel worker threads.");
2127 2127
2128 2128 G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2129 2129
2130 2130 // We need to reset the phase for each task execution so that
2131 2131 // the termination protocol of CMTask::do_marking_step works.
2132 2132 _cm->set_phase(_active_workers, false /* concurrent */);
2133 2133 _g1h->set_par_threads(_active_workers);
2134 2134 _workers->run_task(&proc_task_proxy);
2135 2135 _g1h->set_par_threads(0);
2136 2136 }
2137 2137
2138 2138 class G1RefEnqueueTaskProxy: public AbstractGangTask {
2139 2139 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2140 2140 EnqueueTask& _enq_task;
2141 2141
2142 2142 public:
2143 2143 G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
2144 2144 AbstractGangTask("Enqueue reference objects in parallel"),
2145 2145 _enq_task(enq_task)
2146 2146 { }
2147 2147
2148 2148 virtual void work(int i) {
2149 2149 _enq_task.work(i);
2150 2150 }
2151 2151 };
2152 2152
2153 2153 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2154 2154 assert(_workers != NULL, "Need parallel worker threads.");
2155 2155
2156 2156 G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
2157 2157
2158 2158 _g1h->set_par_threads(_active_workers);
2159 2159 _workers->run_task(&enq_task_proxy);
2160 2160 _g1h->set_par_threads(0);
2161 2161 }
2162 2162
2163 2163 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2164 2164 ResourceMark rm;
2165 2165 HandleMark hm;
2166 2166 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2167 2167 ReferenceProcessor* rp = g1h->ref_processor();
2168 2168
2169 2169 // See the comment in G1CollectedHeap::ref_processing_init()
2170 2170 // about how reference processing currently works in G1.
2171 2171
2172 2172 // Process weak references.
2173 2173 rp->setup_policy(clear_all_soft_refs);
2174 2174 assert(_markStack.isEmpty(), "mark stack should be empty");
2175 2175
2176 2176 G1CMIsAliveClosure g1_is_alive(g1h);
2177 2177 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2178 2178 G1CMDrainMarkingStackClosure
2179 2179 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2180 2180 // We use the work gang from the G1CollectedHeap and we utilize all
2181 2181 // the worker threads.
2182 2182 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2183 2183 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2184 2184
2185 2185 G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2186 2186 g1h->workers(), active_workers);
2187 2187
2188 2188
2189 2189 if (rp->processing_is_mt()) {
2190 2190 // Set the degree of MT here. If the discovery is done MT, there
2191 2191 // may have been a different number of threads doing the discovery
2192 2192 // and a different number of discovered lists may have Ref objects.
2193 2193 // That is OK as long as the Reference lists are balanced (see
2194 2194 // balance_all_queues() and balance_queues()).
2195 2195 rp->set_active_mt_degree(active_workers);
2196 2196
2197 2197 rp->process_discovered_references(&g1_is_alive,
2198 2198 &g1_keep_alive,
2199 2199 &g1_drain_mark_stack,
2200 2200 &par_task_executor);
2201 2201
2202 2202 // The work routines of the parallel keep_alive and drain_marking_stack
2203 2203 // will set the has_overflown flag if we overflow the global marking
2204 2204 // stack.
2205 2205 } else {
2206 2206 rp->process_discovered_references(&g1_is_alive,
2207 2207 &g1_keep_alive,
2208 2208 &g1_drain_mark_stack,
2209 2209 NULL);
2210 2210
2211 2211 }
2212 2212
2213 2213 assert(_markStack.overflow() || _markStack.isEmpty(),
2214 2214 "mark stack should be empty (unless it overflowed)");
2215 2215 if (_markStack.overflow()) {
2216 2216 // Should have been done already when we tried to push an
2217 2217 // entry on to the global mark stack. But let's do it again.
2218 2218 set_has_overflown();
2219 2219 }
2220 2220
2221 2221 if (rp->processing_is_mt()) {
2222 2222 assert(rp->num_q() == active_workers, "why not");
2223 2223 rp->enqueue_discovered_references(&par_task_executor);
2224 2224 } else {
2225 2225 rp->enqueue_discovered_references();
2226 2226 }
2227 2227
2228 2228 rp->verify_no_references_recorded();
2229 2229 assert(!rp->discovery_enabled(), "should have been disabled");
2230 2230
2231 2231 // Now clean up stale oops in StringTable
2232 2232 StringTable::unlink(&g1_is_alive);
2233 2233 // Clean up unreferenced symbols in symbol table.
2234 2234 SymbolTable::unlink();
2235 2235 }
2236 2236
2237 2237 void ConcurrentMark::swapMarkBitMaps() {
2238 2238 CMBitMapRO* temp = _prevMarkBitMap;
2239 2239 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2240 2240 _nextMarkBitMap = (CMBitMap*) temp;
2241 2241 }
2242 2242
2243 2243 class CMRemarkTask: public AbstractGangTask {
2244 2244 private:
2245 2245 ConcurrentMark *_cm;
2246 2246
2247 2247 public:
2248 2248 void work(int worker_i) {
2249 2249 // Since all available tasks are actually started, we should
2250 2250 // only proceed if we're supposed to be actived.
2251 2251 if ((size_t)worker_i < _cm->active_tasks()) {
2252 2252 CMTask* task = _cm->task(worker_i);
2253 2253 task->record_start_time();
2254 2254 do {
2255 2255 task->do_marking_step(1000000000.0 /* something very large */,
2256 2256 true /* do_stealing */,
2257 2257 true /* do_termination */);
2258 2258 } while (task->has_aborted() && !_cm->has_overflown());
2259 2259 // If we overflow, then we do not want to restart. We instead
2260 2260 // want to abort remark and do concurrent marking again.
2261 2261 task->record_end_time();
2262 2262 }
2263 2263 }
2264 2264
2265 2265 CMRemarkTask(ConcurrentMark* cm) :
2266 2266 AbstractGangTask("Par Remark"), _cm(cm) { }
2267 2267 };
2268 2268
2269 2269 void ConcurrentMark::checkpointRootsFinalWork() {
2270 2270 ResourceMark rm;
2271 2271 HandleMark hm;
2272 2272 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2273 2273
2274 2274 g1h->ensure_parsability(false);
2275 2275
2276 2276 if (G1CollectedHeap::use_parallel_gc_threads()) {
2277 2277 G1CollectedHeap::StrongRootsScope srs(g1h);
2278 2278 // this is remark, so we'll use up all available threads
2279 2279 int active_workers = ParallelGCThreads;
2280 2280 set_phase(active_workers, false /* concurrent */);
2281 2281
2282 2282 CMRemarkTask remarkTask(this);
2283 2283 // We will start all available threads, even if we decide that the
2284 2284 // active_workers will be fewer. The extra ones will just bail out
2285 2285 // immediately.
2286 2286 int n_workers = g1h->workers()->total_workers();
2287 2287 g1h->set_par_threads(n_workers);
2288 2288 g1h->workers()->run_task(&remarkTask);
2289 2289 g1h->set_par_threads(0);
2290 2290 } else {
2291 2291 G1CollectedHeap::StrongRootsScope srs(g1h);
2292 2292 // this is remark, so we'll use up all available threads
2293 2293 int active_workers = 1;
2294 2294 set_phase(active_workers, false /* concurrent */);
2295 2295
2296 2296 CMRemarkTask remarkTask(this);
2297 2297 // We will start all available threads, even if we decide that the
2298 2298 // active_workers will be fewer. The extra ones will just bail out
2299 2299 // immediately.
2300 2300 remarkTask.work(0);
2301 2301 }
2302 2302 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2303 2303 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2304 2304
2305 2305 print_stats();
2306 2306
2307 2307 #if VERIFY_OBJS_PROCESSED
2308 2308 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2309 2309 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2310 2310 _scan_obj_cl.objs_processed,
2311 2311 ThreadLocalObjQueue::objs_enqueued);
2312 2312 guarantee(_scan_obj_cl.objs_processed ==
2313 2313 ThreadLocalObjQueue::objs_enqueued,
2314 2314 "Different number of objs processed and enqueued.");
2315 2315 }
2316 2316 #endif
2317 2317 }
2318 2318
2319 2319 #ifndef PRODUCT
2320 2320
2321 2321 class PrintReachableOopClosure: public OopClosure {
2322 2322 private:
2323 2323 G1CollectedHeap* _g1h;
2324 2324 outputStream* _out;
2325 2325 VerifyOption _vo;
2326 2326 bool _all;
2327 2327
2328 2328 public:
2329 2329 PrintReachableOopClosure(outputStream* out,
2330 2330 VerifyOption vo,
2331 2331 bool all) :
2332 2332 _g1h(G1CollectedHeap::heap()),
2333 2333 _out(out), _vo(vo), _all(all) { }
2334 2334
2335 2335 void do_oop(narrowOop* p) { do_oop_work(p); }
2336 2336 void do_oop( oop* p) { do_oop_work(p); }
2337 2337
2338 2338 template <class T> void do_oop_work(T* p) {
2339 2339 oop obj = oopDesc::load_decode_heap_oop(p);
2340 2340 const char* str = NULL;
2341 2341 const char* str2 = "";
2342 2342
2343 2343 if (obj == NULL) {
2344 2344 str = "";
2345 2345 } else if (!_g1h->is_in_g1_reserved(obj)) {
2346 2346 str = " O";
2347 2347 } else {
2348 2348 HeapRegion* hr = _g1h->heap_region_containing(obj);
2349 2349 guarantee(hr != NULL, "invariant");
2350 2350 bool over_tams = false;
2351 2351 bool marked = false;
2352 2352
2353 2353 switch (_vo) {
2354 2354 case VerifyOption_G1UsePrevMarking:
2355 2355 over_tams = hr->obj_allocated_since_prev_marking(obj);
2356 2356 marked = _g1h->isMarkedPrev(obj);
2357 2357 break;
2358 2358 case VerifyOption_G1UseNextMarking:
2359 2359 over_tams = hr->obj_allocated_since_next_marking(obj);
2360 2360 marked = _g1h->isMarkedNext(obj);
2361 2361 break;
2362 2362 case VerifyOption_G1UseMarkWord:
2363 2363 marked = obj->is_gc_marked();
2364 2364 break;
2365 2365 default:
2366 2366 ShouldNotReachHere();
2367 2367 }
2368 2368
2369 2369 if (over_tams) {
2370 2370 str = " >";
2371 2371 if (marked) {
2372 2372 str2 = " AND MARKED";
2373 2373 }
2374 2374 } else if (marked) {
2375 2375 str = " M";
2376 2376 } else {
2377 2377 str = " NOT";
2378 2378 }
2379 2379 }
2380 2380
2381 2381 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2382 2382 p, (void*) obj, str, str2);
2383 2383 }
2384 2384 };
2385 2385
2386 2386 class PrintReachableObjectClosure : public ObjectClosure {
2387 2387 private:
2388 2388 G1CollectedHeap* _g1h;
2389 2389 outputStream* _out;
2390 2390 VerifyOption _vo;
2391 2391 bool _all;
2392 2392 HeapRegion* _hr;
2393 2393
2394 2394 public:
2395 2395 PrintReachableObjectClosure(outputStream* out,
2396 2396 VerifyOption vo,
2397 2397 bool all,
2398 2398 HeapRegion* hr) :
2399 2399 _g1h(G1CollectedHeap::heap()),
2400 2400 _out(out), _vo(vo), _all(all), _hr(hr) { }
2401 2401
2402 2402 void do_object(oop o) {
2403 2403 bool over_tams = false;
2404 2404 bool marked = false;
2405 2405
2406 2406 switch (_vo) {
2407 2407 case VerifyOption_G1UsePrevMarking:
2408 2408 over_tams = _hr->obj_allocated_since_prev_marking(o);
2409 2409 marked = _g1h->isMarkedPrev(o);
2410 2410 break;
2411 2411 case VerifyOption_G1UseNextMarking:
2412 2412 over_tams = _hr->obj_allocated_since_next_marking(o);
2413 2413 marked = _g1h->isMarkedNext(o);
2414 2414 break;
2415 2415 case VerifyOption_G1UseMarkWord:
2416 2416 marked = o->is_gc_marked();
2417 2417 break;
2418 2418 default:
2419 2419 ShouldNotReachHere();
2420 2420 }
2421 2421 bool print_it = _all || over_tams || marked;
2422 2422
2423 2423 if (print_it) {
2424 2424 _out->print_cr(" "PTR_FORMAT"%s",
2425 2425 o, (over_tams) ? " >" : (marked) ? " M" : "");
2426 2426 PrintReachableOopClosure oopCl(_out, _vo, _all);
2427 2427 o->oop_iterate(&oopCl);
2428 2428 }
2429 2429 }
2430 2430 };
2431 2431
2432 2432 class PrintReachableRegionClosure : public HeapRegionClosure {
2433 2433 private:
2434 2434 outputStream* _out;
2435 2435 VerifyOption _vo;
2436 2436 bool _all;
2437 2437
2438 2438 public:
2439 2439 bool doHeapRegion(HeapRegion* hr) {
2440 2440 HeapWord* b = hr->bottom();
2441 2441 HeapWord* e = hr->end();
2442 2442 HeapWord* t = hr->top();
2443 2443 HeapWord* p = NULL;
2444 2444
2445 2445 switch (_vo) {
2446 2446 case VerifyOption_G1UsePrevMarking:
2447 2447 p = hr->prev_top_at_mark_start();
2448 2448 break;
2449 2449 case VerifyOption_G1UseNextMarking:
2450 2450 p = hr->next_top_at_mark_start();
2451 2451 break;
2452 2452 case VerifyOption_G1UseMarkWord:
2453 2453 // When we are verifying marking using the mark word
2454 2454 // TAMS has no relevance.
2455 2455 assert(p == NULL, "post-condition");
2456 2456 break;
2457 2457 default:
2458 2458 ShouldNotReachHere();
2459 2459 }
2460 2460 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2461 2461 "TAMS: "PTR_FORMAT, b, e, t, p);
2462 2462 _out->cr();
2463 2463
2464 2464 HeapWord* from = b;
2465 2465 HeapWord* to = t;
2466 2466
2467 2467 if (to > from) {
2468 2468 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2469 2469 _out->cr();
2470 2470 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2471 2471 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2472 2472 _out->cr();
2473 2473 }
2474 2474
2475 2475 return false;
2476 2476 }
2477 2477
2478 2478 PrintReachableRegionClosure(outputStream* out,
2479 2479 VerifyOption vo,
2480 2480 bool all) :
2481 2481 _out(out), _vo(vo), _all(all) { }
2482 2482 };
2483 2483
2484 2484 static const char* verify_option_to_tams(VerifyOption vo) {
2485 2485 switch (vo) {
2486 2486 case VerifyOption_G1UsePrevMarking:
2487 2487 return "PTAMS";
2488 2488 case VerifyOption_G1UseNextMarking:
2489 2489 return "NTAMS";
2490 2490 default:
2491 2491 return "NONE";
2492 2492 }
2493 2493 }
2494 2494
2495 2495 void ConcurrentMark::print_reachable(const char* str,
2496 2496 VerifyOption vo,
2497 2497 bool all) {
2498 2498 gclog_or_tty->cr();
2499 2499 gclog_or_tty->print_cr("== Doing heap dump... ");
2500 2500
2501 2501 if (G1PrintReachableBaseFile == NULL) {
2502 2502 gclog_or_tty->print_cr(" #### error: no base file defined");
2503 2503 return;
2504 2504 }
2505 2505
2506 2506 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2507 2507 (JVM_MAXPATHLEN - 1)) {
2508 2508 gclog_or_tty->print_cr(" #### error: file name too long");
2509 2509 return;
2510 2510 }
2511 2511
2512 2512 char file_name[JVM_MAXPATHLEN];
2513 2513 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2514 2514 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2515 2515
2516 2516 fileStream fout(file_name);
2517 2517 if (!fout.is_open()) {
2518 2518 gclog_or_tty->print_cr(" #### error: could not open file");
2519 2519 return;
2520 2520 }
2521 2521
2522 2522 outputStream* out = &fout;
2523 2523 out->print_cr("-- USING %s", verify_option_to_tams(vo));
2524 2524 out->cr();
2525 2525
2526 2526 out->print_cr("--- ITERATING OVER REGIONS");
2527 2527 out->cr();
2528 2528 PrintReachableRegionClosure rcl(out, vo, all);
2529 2529 _g1h->heap_region_iterate(&rcl);
2530 2530 out->cr();
2531 2531
2532 2532 gclog_or_tty->print_cr(" done");
2533 2533 gclog_or_tty->flush();
2534 2534 }
2535 2535
2536 2536 #endif // PRODUCT
2537 2537
2538 2538 // This note is for drainAllSATBBuffers and the code in between.
2539 2539 // In the future we could reuse a task to do this work during an
2540 2540 // evacuation pause (since now tasks are not active and can be claimed
2541 2541 // during an evacuation pause). This was a late change to the code and
2542 2542 // is currently not being taken advantage of.
2543 2543
2544 2544 class CMGlobalObjectClosure : public ObjectClosure {
2545 2545 private:
2546 2546 ConcurrentMark* _cm;
2547 2547
2548 2548 public:
2549 2549 void do_object(oop obj) {
2550 2550 _cm->deal_with_reference(obj);
2551 2551 }
2552 2552
2553 2553 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2554 2554 };
2555 2555
2556 2556 void ConcurrentMark::deal_with_reference(oop obj) {
2557 2557 if (verbose_high()) {
2558 2558 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2559 2559 (void*) obj);
2560 2560 }
2561 2561
2562 2562 HeapWord* objAddr = (HeapWord*) obj;
2563 2563 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2564 2564 if (_g1h->is_in_g1_reserved(objAddr)) {
2565 2565 assert(obj != NULL, "null check is implicit");
2566 2566 if (!_nextMarkBitMap->isMarked(objAddr)) {
2567 2567 // Only get the containing region if the object is not marked on the
2568 2568 // bitmap (otherwise, it's a waste of time since we won't do
2569 2569 // anything with it).
2570 2570 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2571 2571 if (!hr->obj_allocated_since_next_marking(obj)) {
2572 2572 if (verbose_high()) {
2573 2573 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2574 2574 "marked", (void*) obj);
2575 2575 }
2576 2576
2577 2577 // we need to mark it first
2578 2578 if (_nextMarkBitMap->parMark(objAddr)) {
2579 2579 // No OrderAccess:store_load() is needed. It is implicit in the
2580 2580 // CAS done in parMark(objAddr) above
2581 2581 HeapWord* finger = _finger;
2582 2582 if (objAddr < finger) {
2583 2583 if (verbose_high()) {
2584 2584 gclog_or_tty->print_cr("[global] below the global finger "
2585 2585 "("PTR_FORMAT"), pushing it", finger);
2586 2586 }
2587 2587 if (!mark_stack_push(obj)) {
2588 2588 if (verbose_low()) {
2589 2589 gclog_or_tty->print_cr("[global] global stack overflow during "
2590 2590 "deal_with_reference");
2591 2591 }
2592 2592 }
2593 2593 }
2594 2594 }
2595 2595 }
2596 2596 }
2597 2597 }
2598 2598 }
2599 2599
2600 2600 void ConcurrentMark::drainAllSATBBuffers() {
2601 2601 CMGlobalObjectClosure oc(this);
2602 2602 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2603 2603 satb_mq_set.set_closure(&oc);
2604 2604
2605 2605 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2606 2606 if (verbose_medium()) {
2607 2607 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2608 2608 }
2609 2609 }
2610 2610
2611 2611 // no need to check whether we should do this, as this is only
2612 2612 // called during an evacuation pause
2613 2613 satb_mq_set.iterate_closure_all_threads();
2614 2614
2615 2615 satb_mq_set.set_closure(NULL);
2616 2616 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2617 2617 }
2618 2618
2619 2619 void ConcurrentMark::markPrev(oop p) {
2620 2620 // Note we are overriding the read-only view of the prev map here, via
2621 2621 // the cast.
2622 2622 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2623 2623 }
2624 2624
2625 2625 void ConcurrentMark::clear(oop p) {
2626 2626 assert(p != NULL && p->is_oop(), "expected an oop");
2627 2627 HeapWord* addr = (HeapWord*)p;
2628 2628 assert(addr >= _nextMarkBitMap->startWord() ||
2629 2629 addr < _nextMarkBitMap->endWord(), "in a region");
2630 2630
2631 2631 _nextMarkBitMap->clear(addr);
2632 2632 }
2633 2633
2634 2634 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2635 2635 // Note we are overriding the read-only view of the prev map here, via
2636 2636 // the cast.
2637 2637 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2638 2638 _nextMarkBitMap->clearRange(mr);
2639 2639 }
2640 2640
2641 2641 HeapRegion*
2642 2642 ConcurrentMark::claim_region(int task_num) {
2643 2643 // "checkpoint" the finger
2644 2644 HeapWord* finger = _finger;
2645 2645
2646 2646 // _heap_end will not change underneath our feet; it only changes at
2647 2647 // yield points.
2648 2648 while (finger < _heap_end) {
2649 2649 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2650 2650
2651 2651 // Note on how this code handles humongous regions. In the
2652 2652 // normal case the finger will reach the start of a "starts
2653 2653 // humongous" (SH) region. Its end will either be the end of the
2654 2654 // last "continues humongous" (CH) region in the sequence, or the
2655 2655 // standard end of the SH region (if the SH is the only region in
2656 2656 // the sequence). That way claim_region() will skip over the CH
2657 2657 // regions. However, there is a subtle race between a CM thread
2658 2658 // executing this method and a mutator thread doing a humongous
2659 2659 // object allocation. The two are not mutually exclusive as the CM
2660 2660 // thread does not need to hold the Heap_lock when it gets
2661 2661 // here. So there is a chance that claim_region() will come across
2662 2662 // a free region that's in the progress of becoming a SH or a CH
2663 2663 // region. In the former case, it will either
2664 2664 // a) Miss the update to the region's end, in which case it will
2665 2665 // visit every subsequent CH region, will find their bitmaps
2666 2666 // empty, and do nothing, or
2667 2667 // b) Will observe the update of the region's end (in which case
2668 2668 // it will skip the subsequent CH regions).
2669 2669 // If it comes across a region that suddenly becomes CH, the
2670 2670 // scenario will be similar to b). So, the race between
2671 2671 // claim_region() and a humongous object allocation might force us
2672 2672 // to do a bit of unnecessary work (due to some unnecessary bitmap
2673 2673 // iterations) but it should not introduce and correctness issues.
2674 2674 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2675 2675 HeapWord* bottom = curr_region->bottom();
2676 2676 HeapWord* end = curr_region->end();
2677 2677 HeapWord* limit = curr_region->next_top_at_mark_start();
2678 2678
2679 2679 if (verbose_low()) {
2680 2680 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2681 2681 "["PTR_FORMAT", "PTR_FORMAT"), "
2682 2682 "limit = "PTR_FORMAT,
2683 2683 task_num, curr_region, bottom, end, limit);
2684 2684 }
2685 2685
2686 2686 // Is the gap between reading the finger and doing the CAS too long?
2687 2687 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2688 2688 if (res == finger) {
2689 2689 // we succeeded
2690 2690
2691 2691 // notice that _finger == end cannot be guaranteed here since,
2692 2692 // someone else might have moved the finger even further
2693 2693 assert(_finger >= end, "the finger should have moved forward");
2694 2694
2695 2695 if (verbose_low()) {
2696 2696 gclog_or_tty->print_cr("[%d] we were successful with region = "
2697 2697 PTR_FORMAT, task_num, curr_region);
2698 2698 }
2699 2699
2700 2700 if (limit > bottom) {
2701 2701 if (verbose_low()) {
2702 2702 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2703 2703 "returning it ", task_num, curr_region);
2704 2704 }
2705 2705 return curr_region;
2706 2706 } else {
2707 2707 assert(limit == bottom,
2708 2708 "the region limit should be at bottom");
2709 2709 if (verbose_low()) {
2710 2710 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2711 2711 "returning NULL", task_num, curr_region);
2712 2712 }
2713 2713 // we return NULL and the caller should try calling
2714 2714 // claim_region() again.
2715 2715 return NULL;
2716 2716 }
2717 2717 } else {
2718 2718 assert(_finger > finger, "the finger should have moved forward");
2719 2719 if (verbose_low()) {
2720 2720 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2721 2721 "global finger = "PTR_FORMAT", "
2722 2722 "our finger = "PTR_FORMAT,
2723 2723 task_num, _finger, finger);
2724 2724 }
2725 2725
2726 2726 // read it again
2727 2727 finger = _finger;
2728 2728 }
2729 2729 }
2730 2730
2731 2731 return NULL;
2732 2732 }
2733 2733
2734 2734 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2735 2735 bool result = false;
2736 2736 for (int i = 0; i < (int)_max_task_num; ++i) {
2737 2737 CMTask* the_task = _tasks[i];
2738 2738 MemRegion mr = the_task->aborted_region();
2739 2739 if (mr.start() != NULL) {
2740 2740 assert(mr.end() != NULL, "invariant");
2741 2741 assert(mr.word_size() > 0, "invariant");
2742 2742 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2743 2743 assert(hr != NULL, "invariant");
2744 2744 if (hr->in_collection_set()) {
2745 2745 // The region points into the collection set
2746 2746 the_task->set_aborted_region(MemRegion());
2747 2747 result = true;
2748 2748 }
2749 2749 }
2750 2750 }
2751 2751 return result;
2752 2752 }
2753 2753
2754 2754 bool ConcurrentMark::has_aborted_regions() {
2755 2755 for (int i = 0; i < (int)_max_task_num; ++i) {
2756 2756 CMTask* the_task = _tasks[i];
2757 2757 MemRegion mr = the_task->aborted_region();
2758 2758 if (mr.start() != NULL) {
2759 2759 assert(mr.end() != NULL, "invariant");
2760 2760 assert(mr.word_size() > 0, "invariant");
2761 2761 return true;
2762 2762 }
2763 2763 }
2764 2764 return false;
2765 2765 }
2766 2766
2767 2767 void ConcurrentMark::oops_do(OopClosure* cl) {
2768 2768 if (_markStack.size() > 0 && verbose_low()) {
2769 2769 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2770 2770 "size = %d", _markStack.size());
2771 2771 }
2772 2772 // we first iterate over the contents of the mark stack...
2773 2773 _markStack.oops_do(cl);
2774 2774
2775 2775 for (int i = 0; i < (int)_max_task_num; ++i) {
2776 2776 OopTaskQueue* queue = _task_queues->queue((int)i);
2777 2777
2778 2778 if (queue->size() > 0 && verbose_low()) {
2779 2779 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2780 2780 "size = %d", i, queue->size());
2781 2781 }
2782 2782
2783 2783 // ...then over the contents of the all the task queues.
2784 2784 queue->oops_do(cl);
2785 2785 }
2786 2786
2787 2787 // Invalidate any entries, that are in the region stack, that
2788 2788 // point into the collection set
2789 2789 if (_regionStack.invalidate_entries_into_cset()) {
2790 2790 // otherwise, any gray objects copied during the evacuation pause
2791 2791 // might not be visited.
2792 2792 assert(_should_gray_objects, "invariant");
2793 2793 }
2794 2794
2795 2795 // Invalidate any aborted regions, recorded in the individual CM
2796 2796 // tasks, that point into the collection set.
2797 2797 if (invalidate_aborted_regions_in_cset()) {
2798 2798 // otherwise, any gray objects copied during the evacuation pause
2799 2799 // might not be visited.
2800 2800 assert(_should_gray_objects, "invariant");
2801 2801 }
2802 2802
2803 2803 }
2804 2804
2805 2805 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2806 2806 _markStack.setEmpty();
2807 2807 _markStack.clear_overflow();
2808 2808 _regionStack.setEmpty();
2809 2809 _regionStack.clear_overflow();
2810 2810 if (clear_overflow) {
2811 2811 clear_has_overflown();
2812 2812 } else {
2813 2813 assert(has_overflown(), "pre-condition");
2814 2814 }
2815 2815 _finger = _heap_start;
2816 2816
2817 2817 for (int i = 0; i < (int)_max_task_num; ++i) {
2818 2818 OopTaskQueue* queue = _task_queues->queue(i);
2819 2819 queue->set_empty();
2820 2820 // Clear any partial regions from the CMTasks
2821 2821 _tasks[i]->clear_aborted_region();
2822 2822 }
2823 2823 }
2824 2824
2825 2825 void ConcurrentMark::print_stats() {
2826 2826 if (verbose_stats()) {
2827 2827 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2828 2828 for (size_t i = 0; i < _active_tasks; ++i) {
2829 2829 _tasks[i]->print_stats();
2830 2830 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2831 2831 }
2832 2832 }
2833 2833 }
2834 2834
2835 2835 class CSMarkOopClosure: public OopClosure {
2836 2836 friend class CSMarkBitMapClosure;
2837 2837
2838 2838 G1CollectedHeap* _g1h;
2839 2839 CMBitMap* _bm;
2840 2840 ConcurrentMark* _cm;
2841 2841 oop* _ms;
2842 2842 jint* _array_ind_stack;
2843 2843 int _ms_size;
2844 2844 int _ms_ind;
2845 2845 int _array_increment;
2846 2846
2847 2847 bool push(oop obj, int arr_ind = 0) {
2848 2848 if (_ms_ind == _ms_size) {
2849 2849 gclog_or_tty->print_cr("Mark stack is full.");
2850 2850 return false;
2851 2851 }
2852 2852 _ms[_ms_ind] = obj;
2853 2853 if (obj->is_objArray()) {
2854 2854 _array_ind_stack[_ms_ind] = arr_ind;
2855 2855 }
2856 2856 _ms_ind++;
2857 2857 return true;
2858 2858 }
2859 2859
2860 2860 oop pop() {
2861 2861 if (_ms_ind == 0) {
2862 2862 return NULL;
2863 2863 } else {
2864 2864 _ms_ind--;
2865 2865 return _ms[_ms_ind];
2866 2866 }
2867 2867 }
2868 2868
2869 2869 template <class T> bool drain() {
2870 2870 while (_ms_ind > 0) {
2871 2871 oop obj = pop();
2872 2872 assert(obj != NULL, "Since index was non-zero.");
2873 2873 if (obj->is_objArray()) {
2874 2874 jint arr_ind = _array_ind_stack[_ms_ind];
2875 2875 objArrayOop aobj = objArrayOop(obj);
2876 2876 jint len = aobj->length();
2877 2877 jint next_arr_ind = arr_ind + _array_increment;
2878 2878 if (next_arr_ind < len) {
2879 2879 push(obj, next_arr_ind);
2880 2880 }
2881 2881 // Now process this portion of this one.
2882 2882 int lim = MIN2(next_arr_ind, len);
2883 2883 for (int j = arr_ind; j < lim; j++) {
2884 2884 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2885 2885 }
2886 2886
2887 2887 } else {
2888 2888 obj->oop_iterate(this);
2889 2889 }
2890 2890 if (abort()) return false;
2891 2891 }
2892 2892 return true;
2893 2893 }
2894 2894
2895 2895 public:
2896 2896 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2897 2897 _g1h(G1CollectedHeap::heap()),
2898 2898 _cm(cm),
2899 2899 _bm(cm->nextMarkBitMap()),
2900 2900 _ms_size(ms_size), _ms_ind(0),
2901 2901 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2902 2902 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2903 2903 _array_increment(MAX2(ms_size/8, 16))
2904 2904 {}
2905 2905
2906 2906 ~CSMarkOopClosure() {
2907 2907 FREE_C_HEAP_ARRAY(oop, _ms);
2908 2908 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2909 2909 }
2910 2910
2911 2911 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2912 2912 virtual void do_oop( oop* p) { do_oop_work(p); }
2913 2913
2914 2914 template <class T> void do_oop_work(T* p) {
2915 2915 T heap_oop = oopDesc::load_heap_oop(p);
2916 2916 if (oopDesc::is_null(heap_oop)) return;
2917 2917 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2918 2918 if (obj->is_forwarded()) {
2919 2919 // If the object has already been forwarded, we have to make sure
2920 2920 // that it's marked. So follow the forwarding pointer. Note that
2921 2921 // this does the right thing for self-forwarding pointers in the
2922 2922 // evacuation failure case.
2923 2923 obj = obj->forwardee();
2924 2924 }
2925 2925 HeapRegion* hr = _g1h->heap_region_containing(obj);
2926 2926 if (hr != NULL) {
2927 2927 if (hr->in_collection_set()) {
2928 2928 if (_g1h->is_obj_ill(obj)) {
2929 2929 _bm->mark((HeapWord*)obj);
2930 2930 if (!push(obj)) {
2931 2931 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2932 2932 set_abort();
2933 2933 }
2934 2934 }
2935 2935 } else {
2936 2936 // Outside the collection set; we need to gray it
2937 2937 _cm->deal_with_reference(obj);
2938 2938 }
2939 2939 }
2940 2940 }
2941 2941 };
2942 2942
2943 2943 class CSMarkBitMapClosure: public BitMapClosure {
2944 2944 G1CollectedHeap* _g1h;
2945 2945 CMBitMap* _bitMap;
2946 2946 ConcurrentMark* _cm;
2947 2947 CSMarkOopClosure _oop_cl;
2948 2948 public:
2949 2949 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2950 2950 _g1h(G1CollectedHeap::heap()),
2951 2951 _bitMap(cm->nextMarkBitMap()),
2952 2952 _oop_cl(cm, ms_size)
2953 2953 {}
2954 2954
2955 2955 ~CSMarkBitMapClosure() {}
2956 2956
2957 2957 bool do_bit(size_t offset) {
2958 2958 // convert offset into a HeapWord*
2959 2959 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2960 2960 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2961 2961 "address out of range");
2962 2962 assert(_bitMap->isMarked(addr), "tautology");
2963 2963 oop obj = oop(addr);
2964 2964 if (!obj->is_forwarded()) {
2965 2965 if (!_oop_cl.push(obj)) return false;
2966 2966 if (UseCompressedOops) {
2967 2967 if (!_oop_cl.drain<narrowOop>()) return false;
2968 2968 } else {
2969 2969 if (!_oop_cl.drain<oop>()) return false;
2970 2970 }
2971 2971 }
2972 2972 // Otherwise...
2973 2973 return true;
2974 2974 }
2975 2975 };
2976 2976
2977 2977
2978 2978 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2979 2979 CMBitMap* _bm;
2980 2980 CSMarkBitMapClosure _bit_cl;
2981 2981 enum SomePrivateConstants {
2982 2982 MSSize = 1000
2983 2983 };
2984 2984 bool _completed;
2985 2985 public:
2986 2986 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2987 2987 _bm(cm->nextMarkBitMap()),
2988 2988 _bit_cl(cm, MSSize),
2989 2989 _completed(true)
2990 2990 {}
2991 2991
2992 2992 ~CompleteMarkingInCSHRClosure() {}
2993 2993
2994 2994 bool doHeapRegion(HeapRegion* r) {
2995 2995 if (!r->evacuation_failed()) {
2996 2996 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2997 2997 if (!mr.is_empty()) {
2998 2998 if (!_bm->iterate(&_bit_cl, mr)) {
2999 2999 _completed = false;
3000 3000 return true;
3001 3001 }
3002 3002 }
3003 3003 }
3004 3004 return false;
3005 3005 }
3006 3006
3007 3007 bool completed() { return _completed; }
3008 3008 };
3009 3009
3010 3010 class ClearMarksInHRClosure: public HeapRegionClosure {
3011 3011 CMBitMap* _bm;
3012 3012 public:
3013 3013 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
3014 3014
3015 3015 bool doHeapRegion(HeapRegion* r) {
3016 3016 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3017 3017 MemRegion usedMR = r->used_region();
3018 3018 _bm->clearRange(r->used_region());
3019 3019 }
3020 3020 return false;
3021 3021 }
3022 3022 };
3023 3023
3024 3024 void ConcurrentMark::complete_marking_in_collection_set() {
3025 3025 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3026 3026
3027 3027 if (!g1h->mark_in_progress()) {
3028 3028 g1h->g1_policy()->record_mark_closure_time(0.0);
3029 3029 return;
3030 3030 }
3031 3031
3032 3032 int i = 1;
3033 3033 double start = os::elapsedTime();
3034 3034 while (true) {
3035 3035 i++;
3036 3036 CompleteMarkingInCSHRClosure cmplt(this);
3037 3037 g1h->collection_set_iterate(&cmplt);
3038 3038 if (cmplt.completed()) break;
3039 3039 }
3040 3040 double end_time = os::elapsedTime();
3041 3041 double elapsed_time_ms = (end_time - start) * 1000.0;
3042 3042 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3043 3043
3044 3044 ClearMarksInHRClosure clr(nextMarkBitMap());
3045 3045 g1h->collection_set_iterate(&clr);
3046 3046 }
3047 3047
3048 3048 // The next two methods deal with the following optimisation. Some
3049 3049 // objects are gray by being marked and located above the finger. If
3050 3050 // they are copied, during an evacuation pause, below the finger then
3051 3051 // the need to be pushed on the stack. The observation is that, if
3052 3052 // there are no regions in the collection set located above the
3053 3053 // finger, then the above cannot happen, hence we do not need to
3054 3054 // explicitly gray any objects when copying them to below the
3055 3055 // finger. The global stack will be scanned to ensure that, if it
3056 3056 // points to objects being copied, it will update their
3057 3057 // location. There is a tricky situation with the gray objects in
3058 3058 // region stack that are being coped, however. See the comment in
3059 3059 // newCSet().
3060 3060
3061 3061 void ConcurrentMark::newCSet() {
3062 3062 if (!concurrent_marking_in_progress()) {
3063 3063 // nothing to do if marking is not in progress
3064 3064 return;
3065 3065 }
3066 3066
3067 3067 // find what the lowest finger is among the global and local fingers
3068 3068 _min_finger = _finger;
3069 3069 for (int i = 0; i < (int)_max_task_num; ++i) {
3070 3070 CMTask* task = _tasks[i];
3071 3071 HeapWord* task_finger = task->finger();
3072 3072 if (task_finger != NULL && task_finger < _min_finger) {
3073 3073 _min_finger = task_finger;
3074 3074 }
3075 3075 }
3076 3076
3077 3077 _should_gray_objects = false;
3078 3078
3079 3079 // This fixes a very subtle and fustrating bug. It might be the case
3080 3080 // that, during en evacuation pause, heap regions that contain
3081 3081 // objects that are gray (by being in regions contained in the
3082 3082 // region stack) are included in the collection set. Since such gray
3083 3083 // objects will be moved, and because it's not easy to redirect
3084 3084 // region stack entries to point to a new location (because objects
3085 3085 // in one region might be scattered to multiple regions after they
3086 3086 // are copied), one option is to ensure that all marked objects
3087 3087 // copied during a pause are pushed on the stack. Notice, however,
3088 3088 // that this problem can only happen when the region stack is not
3089 3089 // empty during an evacuation pause. So, we make the fix a bit less
3090 3090 // conservative and ensure that regions are pushed on the stack,
3091 3091 // irrespective whether all collection set regions are below the
3092 3092 // finger, if the region stack is not empty. This is expected to be
3093 3093 // a rare case, so I don't think it's necessary to be smarted about it.
3094 3094 if (!region_stack_empty() || has_aborted_regions()) {
3095 3095 _should_gray_objects = true;
3096 3096 }
3097 3097 }
3098 3098
3099 3099 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3100 3100 if (!concurrent_marking_in_progress()) return;
3101 3101
3102 3102 HeapWord* region_end = hr->end();
3103 3103 if (region_end > _min_finger) {
3104 3104 _should_gray_objects = true;
3105 3105 }
3106 3106 }
3107 3107
3108 3108 // Resets the region fields of active CMTasks whose values point
3109 3109 // into the collection set.
3110 3110 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3111 3111 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3112 3112 assert(parallel_marking_threads() <= _max_task_num, "sanity");
3113 3113
3114 3114 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3115 3115 CMTask* task = _tasks[i];
3116 3116 HeapWord* task_finger = task->finger();
3117 3117 if (task_finger != NULL) {
3118 3118 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3119 3119 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3120 3120 if (finger_region->in_collection_set()) {
3121 3121 // The task's current region is in the collection set.
3122 3122 // This region will be evacuated in the current GC and
3123 3123 // the region fields in the task will be stale.
3124 3124 task->giveup_current_region();
3125 3125 }
3126 3126 }
3127 3127 }
3128 3128 }
3129 3129
3130 3130 // abandon current marking iteration due to a Full GC
3131 3131 void ConcurrentMark::abort() {
3132 3132 // Clear all marks to force marking thread to do nothing
3133 3133 _nextMarkBitMap->clearAll();
3134 3134 // Empty mark stack
3135 3135 clear_marking_state();
3136 3136 for (int i = 0; i < (int)_max_task_num; ++i) {
3137 3137 _tasks[i]->clear_region_fields();
3138 3138 }
3139 3139 _has_aborted = true;
3140 3140
3141 3141 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3142 3142 satb_mq_set.abandon_partial_marking();
3143 3143 // This can be called either during or outside marking, we'll read
3144 3144 // the expected_active value from the SATB queue set.
3145 3145 satb_mq_set.set_active_all_threads(
3146 3146 false, /* new active value */
3147 3147 satb_mq_set.is_active() /* expected_active */);
3148 3148 }
3149 3149
3150 3150 static void print_ms_time_info(const char* prefix, const char* name,
3151 3151 NumberSeq& ns) {
3152 3152 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3153 3153 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3154 3154 if (ns.num() > 0) {
3155 3155 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3156 3156 prefix, ns.sd(), ns.maximum());
3157 3157 }
3158 3158 }
3159 3159
3160 3160 void ConcurrentMark::print_summary_info() {
3161 3161 gclog_or_tty->print_cr(" Concurrent marking:");
3162 3162 print_ms_time_info(" ", "init marks", _init_times);
3163 3163 print_ms_time_info(" ", "remarks", _remark_times);
3164 3164 {
3165 3165 print_ms_time_info(" ", "final marks", _remark_mark_times);
3166 3166 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3167 3167
3168 3168 }
3169 3169 print_ms_time_info(" ", "cleanups", _cleanup_times);
3170 3170 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3171 3171 _total_counting_time,
3172 3172 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3173 3173 (double)_cleanup_times.num()
3174 3174 : 0.0));
3175 3175 if (G1ScrubRemSets) {
3176 3176 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3177 3177 _total_rs_scrub_time,
3178 3178 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3179 3179 (double)_cleanup_times.num()
3180 3180 : 0.0));
3181 3181 }
3182 3182 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3183 3183 (_init_times.sum() + _remark_times.sum() +
3184 3184 _cleanup_times.sum())/1000.0);
3185 3185 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3186 3186 "(%8.2f s marking, %8.2f s counting).",
3187 3187 cmThread()->vtime_accum(),
3188 3188 cmThread()->vtime_mark_accum(),
3189 3189 cmThread()->vtime_count_accum());
3190 3190 }
3191 3191
3192 3192 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3193 3193 _parallel_workers->print_worker_threads_on(st);
3194 3194 }
3195 3195
3196 3196 // Closures
3197 3197 // XXX: there seems to be a lot of code duplication here;
3198 3198 // should refactor and consolidate the shared code.
3199 3199
3200 3200 // This closure is used to mark refs into the CMS generation in
3201 3201 // the CMS bit map. Called at the first checkpoint.
3202 3202
3203 3203 // We take a break if someone is trying to stop the world.
3204 3204 bool ConcurrentMark::do_yield_check(int worker_i) {
3205 3205 if (should_yield()) {
3206 3206 if (worker_i == 0) {
3207 3207 _g1h->g1_policy()->record_concurrent_pause();
3208 3208 }
3209 3209 cmThread()->yield();
3210 3210 if (worker_i == 0) {
3211 3211 _g1h->g1_policy()->record_concurrent_pause_end();
3212 3212 }
3213 3213 return true;
3214 3214 } else {
3215 3215 return false;
3216 3216 }
3217 3217 }
3218 3218
3219 3219 bool ConcurrentMark::should_yield() {
3220 3220 return cmThread()->should_yield();
3221 3221 }
3222 3222
3223 3223 bool ConcurrentMark::containing_card_is_marked(void* p) {
3224 3224 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3225 3225 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3226 3226 }
3227 3227
3228 3228 bool ConcurrentMark::containing_cards_are_marked(void* start,
3229 3229 void* last) {
3230 3230 return containing_card_is_marked(start) &&
3231 3231 containing_card_is_marked(last);
3232 3232 }
3233 3233
3234 3234 #ifndef PRODUCT
3235 3235 // for debugging purposes
3236 3236 void ConcurrentMark::print_finger() {
3237 3237 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3238 3238 _heap_start, _heap_end, _finger);
3239 3239 for (int i = 0; i < (int) _max_task_num; ++i) {
3240 3240 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
3241 3241 }
3242 3242 gclog_or_tty->print_cr("");
3243 3243 }
3244 3244 #endif
3245 3245
3246 3246 void CMTask::scan_object(oop obj) {
3247 3247 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3248 3248
3249 3249 if (_cm->verbose_high()) {
3250 3250 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3251 3251 _task_id, (void*) obj);
3252 3252 }
3253 3253
3254 3254 size_t obj_size = obj->size();
3255 3255 _words_scanned += obj_size;
3256 3256
3257 3257 obj->oop_iterate(_cm_oop_closure);
3258 3258 statsOnly( ++_objs_scanned );
3259 3259 check_limits();
3260 3260 }
3261 3261
3262 3262 // Closure for iteration over bitmaps
3263 3263 class CMBitMapClosure : public BitMapClosure {
3264 3264 private:
3265 3265 // the bitmap that is being iterated over
3266 3266 CMBitMap* _nextMarkBitMap;
3267 3267 ConcurrentMark* _cm;
3268 3268 CMTask* _task;
3269 3269 // true if we're scanning a heap region claimed by the task (so that
3270 3270 // we move the finger along), false if we're not, i.e. currently when
3271 3271 // scanning a heap region popped from the region stack (so that we
3272 3272 // do not move the task finger along; it'd be a mistake if we did so).
3273 3273 bool _scanning_heap_region;
3274 3274
3275 3275 public:
3276 3276 CMBitMapClosure(CMTask *task,
3277 3277 ConcurrentMark* cm,
3278 3278 CMBitMap* nextMarkBitMap)
3279 3279 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3280 3280
3281 3281 void set_scanning_heap_region(bool scanning_heap_region) {
3282 3282 _scanning_heap_region = scanning_heap_region;
3283 3283 }
3284 3284
3285 3285 bool do_bit(size_t offset) {
3286 3286 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3287 3287 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3288 3288 assert( addr < _cm->finger(), "invariant");
3289 3289
3290 3290 if (_scanning_heap_region) {
3291 3291 statsOnly( _task->increase_objs_found_on_bitmap() );
3292 3292 assert(addr >= _task->finger(), "invariant");
3293 3293 // We move that task's local finger along.
3294 3294 _task->move_finger_to(addr);
3295 3295 } else {
3296 3296 // We move the task's region finger along.
3297 3297 _task->move_region_finger_to(addr);
3298 3298 }
3299 3299
3300 3300 _task->scan_object(oop(addr));
3301 3301 // we only partially drain the local queue and global stack
3302 3302 _task->drain_local_queue(true);
3303 3303 _task->drain_global_stack(true);
3304 3304
3305 3305 // if the has_aborted flag has been raised, we need to bail out of
3306 3306 // the iteration
3307 3307 return !_task->has_aborted();
3308 3308 }
3309 3309 };
3310 3310
3311 3311 // Closure for iterating over objects, currently only used for
3312 3312 // processing SATB buffers.
3313 3313 class CMObjectClosure : public ObjectClosure {
3314 3314 private:
3315 3315 CMTask* _task;
3316 3316
3317 3317 public:
3318 3318 void do_object(oop obj) {
3319 3319 _task->deal_with_reference(obj);
3320 3320 }
3321 3321
3322 3322 CMObjectClosure(CMTask* task) : _task(task) { }
3323 3323 };
3324 3324
3325 3325 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3326 3326 ConcurrentMark* cm,
3327 3327 CMTask* task)
3328 3328 : _g1h(g1h), _cm(cm), _task(task) {
3329 3329 assert(_ref_processor == NULL, "should be initialized to NULL");
3330 3330
3331 3331 if (G1UseConcMarkReferenceProcessing) {
3332 3332 _ref_processor = g1h->ref_processor();
3333 3333 assert(_ref_processor != NULL, "should not be NULL");
3334 3334 }
3335 3335 }
3336 3336
3337 3337 void CMTask::setup_for_region(HeapRegion* hr) {
3338 3338 // Separated the asserts so that we know which one fires.
3339 3339 assert(hr != NULL,
3340 3340 "claim_region() should have filtered out continues humongous regions");
3341 3341 assert(!hr->continuesHumongous(),
3342 3342 "claim_region() should have filtered out continues humongous regions");
3343 3343
3344 3344 if (_cm->verbose_low()) {
3345 3345 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3346 3346 _task_id, hr);
3347 3347 }
3348 3348
3349 3349 _curr_region = hr;
3350 3350 _finger = hr->bottom();
3351 3351 update_region_limit();
3352 3352 }
3353 3353
3354 3354 void CMTask::update_region_limit() {
3355 3355 HeapRegion* hr = _curr_region;
3356 3356 HeapWord* bottom = hr->bottom();
3357 3357 HeapWord* limit = hr->next_top_at_mark_start();
3358 3358
3359 3359 if (limit == bottom) {
3360 3360 if (_cm->verbose_low()) {
3361 3361 gclog_or_tty->print_cr("[%d] found an empty region "
3362 3362 "["PTR_FORMAT", "PTR_FORMAT")",
3363 3363 _task_id, bottom, limit);
3364 3364 }
3365 3365 // The region was collected underneath our feet.
3366 3366 // We set the finger to bottom to ensure that the bitmap
3367 3367 // iteration that will follow this will not do anything.
3368 3368 // (this is not a condition that holds when we set the region up,
3369 3369 // as the region is not supposed to be empty in the first place)
3370 3370 _finger = bottom;
3371 3371 } else if (limit >= _region_limit) {
3372 3372 assert(limit >= _finger, "peace of mind");
3373 3373 } else {
3374 3374 assert(limit < _region_limit, "only way to get here");
3375 3375 // This can happen under some pretty unusual circumstances. An
3376 3376 // evacuation pause empties the region underneath our feet (NTAMS
3377 3377 // at bottom). We then do some allocation in the region (NTAMS
3378 3378 // stays at bottom), followed by the region being used as a GC
3379 3379 // alloc region (NTAMS will move to top() and the objects
3380 3380 // originally below it will be grayed). All objects now marked in
3381 3381 // the region are explicitly grayed, if below the global finger,
3382 3382 // and we do not need in fact to scan anything else. So, we simply
3383 3383 // set _finger to be limit to ensure that the bitmap iteration
3384 3384 // doesn't do anything.
3385 3385 _finger = limit;
3386 3386 }
3387 3387
3388 3388 _region_limit = limit;
3389 3389 }
3390 3390
3391 3391 void CMTask::giveup_current_region() {
3392 3392 assert(_curr_region != NULL, "invariant");
3393 3393 if (_cm->verbose_low()) {
3394 3394 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3395 3395 _task_id, _curr_region);
3396 3396 }
3397 3397 clear_region_fields();
3398 3398 }
3399 3399
3400 3400 void CMTask::clear_region_fields() {
3401 3401 // Values for these three fields that indicate that we're not
3402 3402 // holding on to a region.
3403 3403 _curr_region = NULL;
3404 3404 _finger = NULL;
3405 3405 _region_limit = NULL;
3406 3406
3407 3407 _region_finger = NULL;
3408 3408 }
3409 3409
3410 3410 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3411 3411 if (cm_oop_closure == NULL) {
3412 3412 assert(_cm_oop_closure != NULL, "invariant");
3413 3413 } else {
3414 3414 assert(_cm_oop_closure == NULL, "invariant");
3415 3415 }
3416 3416 _cm_oop_closure = cm_oop_closure;
3417 3417 }
3418 3418
3419 3419 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3420 3420 guarantee(nextMarkBitMap != NULL, "invariant");
3421 3421
3422 3422 if (_cm->verbose_low()) {
3423 3423 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3424 3424 }
3425 3425
3426 3426 _nextMarkBitMap = nextMarkBitMap;
3427 3427 clear_region_fields();
3428 3428 assert(_aborted_region.is_empty(), "should have been cleared");
3429 3429
3430 3430 _calls = 0;
3431 3431 _elapsed_time_ms = 0.0;
3432 3432 _termination_time_ms = 0.0;
3433 3433 _termination_start_time_ms = 0.0;
3434 3434
3435 3435 #if _MARKING_STATS_
3436 3436 _local_pushes = 0;
3437 3437 _local_pops = 0;
3438 3438 _local_max_size = 0;
3439 3439 _objs_scanned = 0;
3440 3440 _global_pushes = 0;
3441 3441 _global_pops = 0;
3442 3442 _global_max_size = 0;
3443 3443 _global_transfers_to = 0;
3444 3444 _global_transfers_from = 0;
3445 3445 _region_stack_pops = 0;
3446 3446 _regions_claimed = 0;
3447 3447 _objs_found_on_bitmap = 0;
3448 3448 _satb_buffers_processed = 0;
3449 3449 _steal_attempts = 0;
3450 3450 _steals = 0;
3451 3451 _aborted = 0;
3452 3452 _aborted_overflow = 0;
3453 3453 _aborted_cm_aborted = 0;
3454 3454 _aborted_yield = 0;
3455 3455 _aborted_timed_out = 0;
3456 3456 _aborted_satb = 0;
3457 3457 _aborted_termination = 0;
3458 3458 #endif // _MARKING_STATS_
3459 3459 }
3460 3460
3461 3461 bool CMTask::should_exit_termination() {
3462 3462 regular_clock_call();
3463 3463 // This is called when we are in the termination protocol. We should
3464 3464 // quit if, for some reason, this task wants to abort or the global
3465 3465 // stack is not empty (this means that we can get work from it).
3466 3466 return !_cm->mark_stack_empty() || has_aborted();
3467 3467 }
3468 3468
3469 3469 void CMTask::reached_limit() {
3470 3470 assert(_words_scanned >= _words_scanned_limit ||
3471 3471 _refs_reached >= _refs_reached_limit ,
3472 3472 "shouldn't have been called otherwise");
3473 3473 regular_clock_call();
3474 3474 }
3475 3475
3476 3476 void CMTask::regular_clock_call() {
3477 3477 if (has_aborted()) return;
3478 3478
3479 3479 // First, we need to recalculate the words scanned and refs reached
3480 3480 // limits for the next clock call.
3481 3481 recalculate_limits();
3482 3482
3483 3483 // During the regular clock call we do the following
3484 3484
3485 3485 // (1) If an overflow has been flagged, then we abort.
3486 3486 if (_cm->has_overflown()) {
3487 3487 set_has_aborted();
3488 3488 return;
3489 3489 }
3490 3490
3491 3491 // If we are not concurrent (i.e. we're doing remark) we don't need
3492 3492 // to check anything else. The other steps are only needed during
3493 3493 // the concurrent marking phase.
3494 3494 if (!concurrent()) return;
3495 3495
3496 3496 // (2) If marking has been aborted for Full GC, then we also abort.
3497 3497 if (_cm->has_aborted()) {
3498 3498 set_has_aborted();
3499 3499 statsOnly( ++_aborted_cm_aborted );
3500 3500 return;
3501 3501 }
3502 3502
3503 3503 double curr_time_ms = os::elapsedVTime() * 1000.0;
3504 3504
3505 3505 // (3) If marking stats are enabled, then we update the step history.
3506 3506 #if _MARKING_STATS_
3507 3507 if (_words_scanned >= _words_scanned_limit) {
3508 3508 ++_clock_due_to_scanning;
3509 3509 }
3510 3510 if (_refs_reached >= _refs_reached_limit) {
3511 3511 ++_clock_due_to_marking;
3512 3512 }
3513 3513
3514 3514 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3515 3515 _interval_start_time_ms = curr_time_ms;
3516 3516 _all_clock_intervals_ms.add(last_interval_ms);
3517 3517
3518 3518 if (_cm->verbose_medium()) {
3519 3519 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3520 3520 "scanned = %d%s, refs reached = %d%s",
3521 3521 _task_id, last_interval_ms,
3522 3522 _words_scanned,
3523 3523 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3524 3524 _refs_reached,
3525 3525 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3526 3526 }
3527 3527 #endif // _MARKING_STATS_
3528 3528
3529 3529 // (4) We check whether we should yield. If we have to, then we abort.
3530 3530 if (_cm->should_yield()) {
3531 3531 // We should yield. To do this we abort the task. The caller is
3532 3532 // responsible for yielding.
3533 3533 set_has_aborted();
3534 3534 statsOnly( ++_aborted_yield );
3535 3535 return;
3536 3536 }
3537 3537
3538 3538 // (5) We check whether we've reached our time quota. If we have,
3539 3539 // then we abort.
3540 3540 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3541 3541 if (elapsed_time_ms > _time_target_ms) {
3542 3542 set_has_aborted();
3543 3543 _has_timed_out = true;
3544 3544 statsOnly( ++_aborted_timed_out );
3545 3545 return;
3546 3546 }
3547 3547
3548 3548 // (6) Finally, we check whether there are enough completed STAB
3549 3549 // buffers available for processing. If there are, we abort.
3550 3550 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3551 3551 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3552 3552 if (_cm->verbose_low()) {
3553 3553 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3554 3554 _task_id);
3555 3555 }
3556 3556 // we do need to process SATB buffers, we'll abort and restart
3557 3557 // the marking task to do so
3558 3558 set_has_aborted();
3559 3559 statsOnly( ++_aborted_satb );
3560 3560 return;
3561 3561 }
3562 3562 }
3563 3563
3564 3564 void CMTask::recalculate_limits() {
3565 3565 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3566 3566 _words_scanned_limit = _real_words_scanned_limit;
3567 3567
3568 3568 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3569 3569 _refs_reached_limit = _real_refs_reached_limit;
3570 3570 }
3571 3571
3572 3572 void CMTask::decrease_limits() {
3573 3573 // This is called when we believe that we're going to do an infrequent
3574 3574 // operation which will increase the per byte scanned cost (i.e. move
3575 3575 // entries to/from the global stack). It basically tries to decrease the
3576 3576 // scanning limit so that the clock is called earlier.
3577 3577
3578 3578 if (_cm->verbose_medium()) {
3579 3579 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3580 3580 }
3581 3581
3582 3582 _words_scanned_limit = _real_words_scanned_limit -
3583 3583 3 * words_scanned_period / 4;
3584 3584 _refs_reached_limit = _real_refs_reached_limit -
3585 3585 3 * refs_reached_period / 4;
3586 3586 }
3587 3587
3588 3588 void CMTask::move_entries_to_global_stack() {
3589 3589 // local array where we'll store the entries that will be popped
3590 3590 // from the local queue
3591 3591 oop buffer[global_stack_transfer_size];
3592 3592
3593 3593 int n = 0;
3594 3594 oop obj;
3595 3595 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3596 3596 buffer[n] = obj;
3597 3597 ++n;
3598 3598 }
3599 3599
3600 3600 if (n > 0) {
3601 3601 // we popped at least one entry from the local queue
3602 3602
3603 3603 statsOnly( ++_global_transfers_to; _local_pops += n );
3604 3604
3605 3605 if (!_cm->mark_stack_push(buffer, n)) {
3606 3606 if (_cm->verbose_low()) {
3607 3607 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3608 3608 _task_id);
3609 3609 }
3610 3610 set_has_aborted();
3611 3611 } else {
3612 3612 // the transfer was successful
3613 3613
3614 3614 if (_cm->verbose_medium()) {
3615 3615 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3616 3616 _task_id, n);
3617 3617 }
3618 3618 statsOnly( int tmp_size = _cm->mark_stack_size();
3619 3619 if (tmp_size > _global_max_size) {
3620 3620 _global_max_size = tmp_size;
3621 3621 }
3622 3622 _global_pushes += n );
3623 3623 }
3624 3624 }
3625 3625
3626 3626 // this operation was quite expensive, so decrease the limits
3627 3627 decrease_limits();
3628 3628 }
3629 3629
3630 3630 void CMTask::get_entries_from_global_stack() {
3631 3631 // local array where we'll store the entries that will be popped
3632 3632 // from the global stack.
3633 3633 oop buffer[global_stack_transfer_size];
3634 3634 int n;
3635 3635 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3636 3636 assert(n <= global_stack_transfer_size,
3637 3637 "we should not pop more than the given limit");
3638 3638 if (n > 0) {
3639 3639 // yes, we did actually pop at least one entry
3640 3640
3641 3641 statsOnly( ++_global_transfers_from; _global_pops += n );
3642 3642 if (_cm->verbose_medium()) {
3643 3643 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3644 3644 _task_id, n);
3645 3645 }
3646 3646 for (int i = 0; i < n; ++i) {
3647 3647 bool success = _task_queue->push(buffer[i]);
3648 3648 // We only call this when the local queue is empty or under a
3649 3649 // given target limit. So, we do not expect this push to fail.
3650 3650 assert(success, "invariant");
3651 3651 }
3652 3652
3653 3653 statsOnly( int tmp_size = _task_queue->size();
3654 3654 if (tmp_size > _local_max_size) {
3655 3655 _local_max_size = tmp_size;
3656 3656 }
3657 3657 _local_pushes += n );
3658 3658 }
3659 3659
3660 3660 // this operation was quite expensive, so decrease the limits
3661 3661 decrease_limits();
3662 3662 }
3663 3663
3664 3664 void CMTask::drain_local_queue(bool partially) {
3665 3665 if (has_aborted()) return;
3666 3666
3667 3667 // Decide what the target size is, depending whether we're going to
3668 3668 // drain it partially (so that other tasks can steal if they run out
3669 3669 // of things to do) or totally (at the very end).
3670 3670 size_t target_size;
3671 3671 if (partially) {
3672 3672 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3673 3673 } else {
3674 3674 target_size = 0;
3675 3675 }
3676 3676
3677 3677 if (_task_queue->size() > target_size) {
3678 3678 if (_cm->verbose_high()) {
3679 3679 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3680 3680 _task_id, target_size);
3681 3681 }
3682 3682
3683 3683 oop obj;
3684 3684 bool ret = _task_queue->pop_local(obj);
3685 3685 while (ret) {
3686 3686 statsOnly( ++_local_pops );
3687 3687
3688 3688 if (_cm->verbose_high()) {
3689 3689 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3690 3690 (void*) obj);
3691 3691 }
3692 3692
3693 3693 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3694 3694 assert(!_g1h->is_on_master_free_list(
3695 3695 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3696 3696
3697 3697 scan_object(obj);
3698 3698
3699 3699 if (_task_queue->size() <= target_size || has_aborted()) {
3700 3700 ret = false;
3701 3701 } else {
3702 3702 ret = _task_queue->pop_local(obj);
3703 3703 }
3704 3704 }
3705 3705
3706 3706 if (_cm->verbose_high()) {
3707 3707 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3708 3708 _task_id, _task_queue->size());
3709 3709 }
3710 3710 }
3711 3711 }
3712 3712
3713 3713 void CMTask::drain_global_stack(bool partially) {
3714 3714 if (has_aborted()) return;
3715 3715
3716 3716 // We have a policy to drain the local queue before we attempt to
3717 3717 // drain the global stack.
3718 3718 assert(partially || _task_queue->size() == 0, "invariant");
3719 3719
3720 3720 // Decide what the target size is, depending whether we're going to
3721 3721 // drain it partially (so that other tasks can steal if they run out
3722 3722 // of things to do) or totally (at the very end). Notice that,
3723 3723 // because we move entries from the global stack in chunks or
3724 3724 // because another task might be doing the same, we might in fact
3725 3725 // drop below the target. But, this is not a problem.
3726 3726 size_t target_size;
3727 3727 if (partially) {
3728 3728 target_size = _cm->partial_mark_stack_size_target();
3729 3729 } else {
3730 3730 target_size = 0;
3731 3731 }
3732 3732
3733 3733 if (_cm->mark_stack_size() > target_size) {
3734 3734 if (_cm->verbose_low()) {
3735 3735 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3736 3736 _task_id, target_size);
3737 3737 }
3738 3738
3739 3739 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3740 3740 get_entries_from_global_stack();
3741 3741 drain_local_queue(partially);
3742 3742 }
3743 3743
3744 3744 if (_cm->verbose_low()) {
3745 3745 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3746 3746 _task_id, _cm->mark_stack_size());
3747 3747 }
3748 3748 }
3749 3749 }
3750 3750
3751 3751 // SATB Queue has several assumptions on whether to call the par or
3752 3752 // non-par versions of the methods. this is why some of the code is
3753 3753 // replicated. We should really get rid of the single-threaded version
3754 3754 // of the code to simplify things.
3755 3755 void CMTask::drain_satb_buffers() {
3756 3756 if (has_aborted()) return;
3757 3757
3758 3758 // We set this so that the regular clock knows that we're in the
3759 3759 // middle of draining buffers and doesn't set the abort flag when it
3760 3760 // notices that SATB buffers are available for draining. It'd be
3761 3761 // very counter productive if it did that. :-)
3762 3762 _draining_satb_buffers = true;
3763 3763
3764 3764 CMObjectClosure oc(this);
3765 3765 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3766 3766 if (G1CollectedHeap::use_parallel_gc_threads()) {
3767 3767 satb_mq_set.set_par_closure(_task_id, &oc);
3768 3768 } else {
3769 3769 satb_mq_set.set_closure(&oc);
3770 3770 }
3771 3771
3772 3772 // This keeps claiming and applying the closure to completed buffers
3773 3773 // until we run out of buffers or we need to abort.
3774 3774 if (G1CollectedHeap::use_parallel_gc_threads()) {
3775 3775 while (!has_aborted() &&
3776 3776 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3777 3777 if (_cm->verbose_medium()) {
3778 3778 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3779 3779 }
3780 3780 statsOnly( ++_satb_buffers_processed );
3781 3781 regular_clock_call();
3782 3782 }
3783 3783 } else {
3784 3784 while (!has_aborted() &&
3785 3785 satb_mq_set.apply_closure_to_completed_buffer()) {
3786 3786 if (_cm->verbose_medium()) {
3787 3787 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3788 3788 }
3789 3789 statsOnly( ++_satb_buffers_processed );
3790 3790 regular_clock_call();
3791 3791 }
3792 3792 }
3793 3793
3794 3794 if (!concurrent() && !has_aborted()) {
3795 3795 // We should only do this during remark.
3796 3796 if (G1CollectedHeap::use_parallel_gc_threads()) {
3797 3797 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3798 3798 } else {
3799 3799 satb_mq_set.iterate_closure_all_threads();
3800 3800 }
3801 3801 }
3802 3802
3803 3803 _draining_satb_buffers = false;
3804 3804
3805 3805 assert(has_aborted() ||
3806 3806 concurrent() ||
3807 3807 satb_mq_set.completed_buffers_num() == 0, "invariant");
3808 3808
3809 3809 if (G1CollectedHeap::use_parallel_gc_threads()) {
3810 3810 satb_mq_set.set_par_closure(_task_id, NULL);
3811 3811 } else {
3812 3812 satb_mq_set.set_closure(NULL);
3813 3813 }
3814 3814
3815 3815 // again, this was a potentially expensive operation, decrease the
3816 3816 // limits to get the regular clock call early
3817 3817 decrease_limits();
3818 3818 }
3819 3819
3820 3820 void CMTask::drain_region_stack(BitMapClosure* bc) {
3821 3821 if (has_aborted()) return;
3822 3822
3823 3823 assert(_region_finger == NULL,
3824 3824 "it should be NULL when we're not scanning a region");
3825 3825
3826 3826 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3827 3827 if (_cm->verbose_low()) {
3828 3828 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3829 3829 _task_id, _cm->region_stack_size());
3830 3830 }
3831 3831
3832 3832 MemRegion mr;
3833 3833
3834 3834 if (!_aborted_region.is_empty()) {
3835 3835 mr = _aborted_region;
3836 3836 _aborted_region = MemRegion();
3837 3837
3838 3838 if (_cm->verbose_low()) {
3839 3839 gclog_or_tty->print_cr("[%d] scanning aborted region "
3840 3840 "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3841 3841 _task_id, mr.start(), mr.end());
3842 3842 }
3843 3843 } else {
3844 3844 mr = _cm->region_stack_pop_lock_free();
3845 3845 // it returns MemRegion() if the pop fails
3846 3846 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3847 3847 }
3848 3848
3849 3849 while (mr.start() != NULL) {
3850 3850 if (_cm->verbose_medium()) {
3851 3851 gclog_or_tty->print_cr("[%d] we are scanning region "
3852 3852 "["PTR_FORMAT", "PTR_FORMAT")",
3853 3853 _task_id, mr.start(), mr.end());
3854 3854 }
3855 3855
3856 3856 assert(mr.end() <= _cm->finger(),
3857 3857 "otherwise the region shouldn't be on the stack");
3858 3858 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3859 3859 if (_nextMarkBitMap->iterate(bc, mr)) {
3860 3860 assert(!has_aborted(),
3861 3861 "cannot abort the task without aborting the bitmap iteration");
3862 3862
3863 3863 // We finished iterating over the region without aborting.
3864 3864 regular_clock_call();
3865 3865 if (has_aborted()) {
3866 3866 mr = MemRegion();
3867 3867 } else {
3868 3868 mr = _cm->region_stack_pop_lock_free();
3869 3869 // it returns MemRegion() if the pop fails
3870 3870 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3871 3871 }
3872 3872 } else {
3873 3873 assert(has_aborted(), "currently the only way to do so");
3874 3874
3875 3875 // The only way to abort the bitmap iteration is to return
3876 3876 // false from the do_bit() method. However, inside the
3877 3877 // do_bit() method we move the _region_finger to point to the
3878 3878 // object currently being looked at. So, if we bail out, we
3879 3879 // have definitely set _region_finger to something non-null.
3880 3880 assert(_region_finger != NULL, "invariant");
3881 3881
3882 3882 // Make sure that any previously aborted region has been
3883 3883 // cleared.
3884 3884 assert(_aborted_region.is_empty(), "aborted region not cleared");
3885 3885
3886 3886 // The iteration was actually aborted. So now _region_finger
3887 3887 // points to the address of the object we last scanned. If we
3888 3888 // leave it there, when we restart this task, we will rescan
3889 3889 // the object. It is easy to avoid this. We move the finger by
3890 3890 // enough to point to the next possible object header (the
3891 3891 // bitmap knows by how much we need to move it as it knows its
3892 3892 // granularity).
3893 3893 MemRegion newRegion =
3894 3894 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3895 3895
3896 3896 if (!newRegion.is_empty()) {
3897 3897 if (_cm->verbose_low()) {
3898 3898 gclog_or_tty->print_cr("[%d] recording unscanned region"
3899 3899 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3900 3900 _task_id,
3901 3901 newRegion.start(), newRegion.end());
3902 3902 }
3903 3903 // Now record the part of the region we didn't scan to
3904 3904 // make sure this task scans it later.
3905 3905 _aborted_region = newRegion;
3906 3906 }
3907 3907 // break from while
3908 3908 mr = MemRegion();
3909 3909 }
3910 3910 _region_finger = NULL;
3911 3911 }
3912 3912
3913 3913 if (_cm->verbose_low()) {
3914 3914 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3915 3915 _task_id, _cm->region_stack_size());
3916 3916 }
3917 3917 }
3918 3918 }
3919 3919
3920 3920 void CMTask::print_stats() {
3921 3921 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3922 3922 _task_id, _calls);
3923 3923 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3924 3924 _elapsed_time_ms, _termination_time_ms);
3925 3925 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3926 3926 _step_times_ms.num(), _step_times_ms.avg(),
3927 3927 _step_times_ms.sd());
3928 3928 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3929 3929 _step_times_ms.maximum(), _step_times_ms.sum());
3930 3930
3931 3931 #if _MARKING_STATS_
3932 3932 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3933 3933 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3934 3934 _all_clock_intervals_ms.sd());
3935 3935 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3936 3936 _all_clock_intervals_ms.maximum(),
3937 3937 _all_clock_intervals_ms.sum());
3938 3938 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3939 3939 _clock_due_to_scanning, _clock_due_to_marking);
3940 3940 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3941 3941 _objs_scanned, _objs_found_on_bitmap);
3942 3942 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3943 3943 _local_pushes, _local_pops, _local_max_size);
3944 3944 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3945 3945 _global_pushes, _global_pops, _global_max_size);
3946 3946 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3947 3947 _global_transfers_to,_global_transfers_from);
3948 3948 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3949 3949 _regions_claimed, _region_stack_pops);
3950 3950 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3951 3951 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3952 3952 _steal_attempts, _steals);
3953 3953 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3954 3954 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3955 3955 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3956 3956 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3957 3957 _aborted_timed_out, _aborted_satb, _aborted_termination);
3958 3958 #endif // _MARKING_STATS_
3959 3959 }
3960 3960
3961 3961 /*****************************************************************************
3962 3962
3963 3963 The do_marking_step(time_target_ms) method is the building block
3964 3964 of the parallel marking framework. It can be called in parallel
3965 3965 with other invocations of do_marking_step() on different tasks
3966 3966 (but only one per task, obviously) and concurrently with the
3967 3967 mutator threads, or during remark, hence it eliminates the need
3968 3968 for two versions of the code. When called during remark, it will
3969 3969 pick up from where the task left off during the concurrent marking
3970 3970 phase. Interestingly, tasks are also claimable during evacuation
3971 3971 pauses too, since do_marking_step() ensures that it aborts before
3972 3972 it needs to yield.
3973 3973
3974 3974 The data structures that is uses to do marking work are the
3975 3975 following:
3976 3976
3977 3977 (1) Marking Bitmap. If there are gray objects that appear only
3978 3978 on the bitmap (this happens either when dealing with an overflow
3979 3979 or when the initial marking phase has simply marked the roots
3980 3980 and didn't push them on the stack), then tasks claim heap
3981 3981 regions whose bitmap they then scan to find gray objects. A
3982 3982 global finger indicates where the end of the last claimed region
3983 3983 is. A local finger indicates how far into the region a task has
3984 3984 scanned. The two fingers are used to determine how to gray an
3985 3985 object (i.e. whether simply marking it is OK, as it will be
3986 3986 visited by a task in the future, or whether it needs to be also
3987 3987 pushed on a stack).
3988 3988
3989 3989 (2) Local Queue. The local queue of the task which is accessed
3990 3990 reasonably efficiently by the task. Other tasks can steal from
3991 3991 it when they run out of work. Throughout the marking phase, a
3992 3992 task attempts to keep its local queue short but not totally
3993 3993 empty, so that entries are available for stealing by other
3994 3994 tasks. Only when there is no more work, a task will totally
3995 3995 drain its local queue.
3996 3996
3997 3997 (3) Global Mark Stack. This handles local queue overflow. During
3998 3998 marking only sets of entries are moved between it and the local
3999 3999 queues, as access to it requires a mutex and more fine-grain
4000 4000 interaction with it which might cause contention. If it
4001 4001 overflows, then the marking phase should restart and iterate
4002 4002 over the bitmap to identify gray objects. Throughout the marking
4003 4003 phase, tasks attempt to keep the global mark stack at a small
4004 4004 length but not totally empty, so that entries are available for
4005 4005 popping by other tasks. Only when there is no more work, tasks
4006 4006 will totally drain the global mark stack.
4007 4007
4008 4008 (4) Global Region Stack. Entries on it correspond to areas of
4009 4009 the bitmap that need to be scanned since they contain gray
4010 4010 objects. Pushes on the region stack only happen during
4011 4011 evacuation pauses and typically correspond to areas covered by
4012 4012 GC LABS. If it overflows, then the marking phase should restart
4013 4013 and iterate over the bitmap to identify gray objects. Tasks will
4014 4014 try to totally drain the region stack as soon as possible.
4015 4015
4016 4016 (5) SATB Buffer Queue. This is where completed SATB buffers are
4017 4017 made available. Buffers are regularly removed from this queue
4018 4018 and scanned for roots, so that the queue doesn't get too
4019 4019 long. During remark, all completed buffers are processed, as
4020 4020 well as the filled in parts of any uncompleted buffers.
4021 4021
4022 4022 The do_marking_step() method tries to abort when the time target
4023 4023 has been reached. There are a few other cases when the
4024 4024 do_marking_step() method also aborts:
4025 4025
4026 4026 (1) When the marking phase has been aborted (after a Full GC).
4027 4027
4028 4028 (2) When a global overflow (either on the global stack or the
4029 4029 region stack) has been triggered. Before the task aborts, it
4030 4030 will actually sync up with the other tasks to ensure that all
4031 4031 the marking data structures (local queues, stacks, fingers etc.)
4032 4032 are re-initialised so that when do_marking_step() completes,
4033 4033 the marking phase can immediately restart.
4034 4034
4035 4035 (3) When enough completed SATB buffers are available. The
4036 4036 do_marking_step() method only tries to drain SATB buffers right
4037 4037 at the beginning. So, if enough buffers are available, the
4038 4038 marking step aborts and the SATB buffers are processed at
4039 4039 the beginning of the next invocation.
4040 4040
4041 4041 (4) To yield. when we have to yield then we abort and yield
4042 4042 right at the end of do_marking_step(). This saves us from a lot
4043 4043 of hassle as, by yielding we might allow a Full GC. If this
4044 4044 happens then objects will be compacted underneath our feet, the
4045 4045 heap might shrink, etc. We save checking for this by just
4046 4046 aborting and doing the yield right at the end.
4047 4047
4048 4048 From the above it follows that the do_marking_step() method should
4049 4049 be called in a loop (or, otherwise, regularly) until it completes.
4050 4050
4051 4051 If a marking step completes without its has_aborted() flag being
4052 4052 true, it means it has completed the current marking phase (and
4053 4053 also all other marking tasks have done so and have all synced up).
4054 4054
4055 4055 A method called regular_clock_call() is invoked "regularly" (in
4056 4056 sub ms intervals) throughout marking. It is this clock method that
4057 4057 checks all the abort conditions which were mentioned above and
4058 4058 decides when the task should abort. A work-based scheme is used to
4059 4059 trigger this clock method: when the number of object words the
4060 4060 marking phase has scanned or the number of references the marking
4061 4061 phase has visited reach a given limit. Additional invocations to
4062 4062 the method clock have been planted in a few other strategic places
4063 4063 too. The initial reason for the clock method was to avoid calling
4064 4064 vtime too regularly, as it is quite expensive. So, once it was in
4065 4065 place, it was natural to piggy-back all the other conditions on it
4066 4066 too and not constantly check them throughout the code.
4067 4067
4068 4068 *****************************************************************************/
4069 4069
4070 4070 void CMTask::do_marking_step(double time_target_ms,
4071 4071 bool do_stealing,
4072 4072 bool do_termination) {
4073 4073 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4074 4074 assert(concurrent() == _cm->concurrent(), "they should be the same");
4075 4075
4076 4076 assert(concurrent() || _cm->region_stack_empty(),
4077 4077 "the region stack should have been cleared before remark");
4078 4078 assert(concurrent() || !_cm->has_aborted_regions(),
4079 4079 "aborted regions should have been cleared before remark");
4080 4080 assert(_region_finger == NULL,
4081 4081 "this should be non-null only when a region is being scanned");
4082 4082
4083 4083 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4084 4084 assert(_task_queues != NULL, "invariant");
4085 4085 assert(_task_queue != NULL, "invariant");
4086 4086 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4087 4087
4088 4088 assert(!_claimed,
4089 4089 "only one thread should claim this task at any one time");
4090 4090
4091 4091 // OK, this doesn't safeguard again all possible scenarios, as it is
4092 4092 // possible for two threads to set the _claimed flag at the same
4093 4093 // time. But it is only for debugging purposes anyway and it will
4094 4094 // catch most problems.
4095 4095 _claimed = true;
4096 4096
4097 4097 _start_time_ms = os::elapsedVTime() * 1000.0;
4098 4098 statsOnly( _interval_start_time_ms = _start_time_ms );
4099 4099
4100 4100 double diff_prediction_ms =
4101 4101 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4102 4102 _time_target_ms = time_target_ms - diff_prediction_ms;
4103 4103
4104 4104 // set up the variables that are used in the work-based scheme to
4105 4105 // call the regular clock method
4106 4106 _words_scanned = 0;
4107 4107 _refs_reached = 0;
4108 4108 recalculate_limits();
4109 4109
4110 4110 // clear all flags
4111 4111 clear_has_aborted();
4112 4112 _has_timed_out = false;
4113 4113 _draining_satb_buffers = false;
4114 4114
4115 4115 ++_calls;
4116 4116
4117 4117 if (_cm->verbose_low()) {
4118 4118 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4119 4119 "target = %1.2lfms >>>>>>>>>>",
4120 4120 _task_id, _calls, _time_target_ms);
4121 4121 }
4122 4122
4123 4123 // Set up the bitmap and oop closures. Anything that uses them is
4124 4124 // eventually called from this method, so it is OK to allocate these
4125 4125 // statically.
4126 4126 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4127 4127 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4128 4128 set_cm_oop_closure(&cm_oop_closure);
4129 4129
4130 4130 if (_cm->has_overflown()) {
4131 4131 // This can happen if the region stack or the mark stack overflows
4132 4132 // during a GC pause and this task, after a yield point,
4133 4133 // restarts. We have to abort as we need to get into the overflow
4134 4134 // protocol which happens right at the end of this task.
4135 4135 set_has_aborted();
4136 4136 }
4137 4137
4138 4138 // First drain any available SATB buffers. After this, we will not
4139 4139 // look at SATB buffers before the next invocation of this method.
4140 4140 // If enough completed SATB buffers are queued up, the regular clock
4141 4141 // will abort this task so that it restarts.
4142 4142 drain_satb_buffers();
4143 4143 // ...then partially drain the local queue and the global stack
4144 4144 drain_local_queue(true);
4145 4145 drain_global_stack(true);
4146 4146
4147 4147 // Then totally drain the region stack. We will not look at
4148 4148 // it again before the next invocation of this method. Entries on
4149 4149 // the region stack are only added during evacuation pauses, for
4150 4150 // which we have to yield. When we do, we abort the task anyway so
4151 4151 // it will look at the region stack again when it restarts.
4152 4152 bitmap_closure.set_scanning_heap_region(false);
4153 4153 drain_region_stack(&bitmap_closure);
4154 4154 // ...then partially drain the local queue and the global stack
4155 4155 drain_local_queue(true);
4156 4156 drain_global_stack(true);
4157 4157
4158 4158 do {
4159 4159 if (!has_aborted() && _curr_region != NULL) {
4160 4160 // This means that we're already holding on to a region.
4161 4161 assert(_finger != NULL, "if region is not NULL, then the finger "
4162 4162 "should not be NULL either");
4163 4163
4164 4164 // We might have restarted this task after an evacuation pause
4165 4165 // which might have evacuated the region we're holding on to
4166 4166 // underneath our feet. Let's read its limit again to make sure
4167 4167 // that we do not iterate over a region of the heap that
4168 4168 // contains garbage (update_region_limit() will also move
4169 4169 // _finger to the start of the region if it is found empty).
4170 4170 update_region_limit();
4171 4171 // We will start from _finger not from the start of the region,
4172 4172 // as we might be restarting this task after aborting half-way
4173 4173 // through scanning this region. In this case, _finger points to
4174 4174 // the address where we last found a marked object. If this is a
4175 4175 // fresh region, _finger points to start().
4176 4176 MemRegion mr = MemRegion(_finger, _region_limit);
4177 4177
4178 4178 if (_cm->verbose_low()) {
4179 4179 gclog_or_tty->print_cr("[%d] we're scanning part "
4180 4180 "["PTR_FORMAT", "PTR_FORMAT") "
4181 4181 "of region "PTR_FORMAT,
4182 4182 _task_id, _finger, _region_limit, _curr_region);
4183 4183 }
4184 4184
4185 4185 // Let's iterate over the bitmap of the part of the
4186 4186 // region that is left.
4187 4187 bitmap_closure.set_scanning_heap_region(true);
4188 4188 if (mr.is_empty() ||
4189 4189 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4190 4190 // We successfully completed iterating over the region. Now,
4191 4191 // let's give up the region.
4192 4192 giveup_current_region();
4193 4193 regular_clock_call();
4194 4194 } else {
4195 4195 assert(has_aborted(), "currently the only way to do so");
4196 4196 // The only way to abort the bitmap iteration is to return
4197 4197 // false from the do_bit() method. However, inside the
4198 4198 // do_bit() method we move the _finger to point to the
4199 4199 // object currently being looked at. So, if we bail out, we
4200 4200 // have definitely set _finger to something non-null.
4201 4201 assert(_finger != NULL, "invariant");
4202 4202
4203 4203 // Region iteration was actually aborted. So now _finger
4204 4204 // points to the address of the object we last scanned. If we
4205 4205 // leave it there, when we restart this task, we will rescan
4206 4206 // the object. It is easy to avoid this. We move the finger by
4207 4207 // enough to point to the next possible object header (the
4208 4208 // bitmap knows by how much we need to move it as it knows its
4209 4209 // granularity).
4210 4210 assert(_finger < _region_limit, "invariant");
4211 4211 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4212 4212 // Check if bitmap iteration was aborted while scanning the last object
4213 4213 if (new_finger >= _region_limit) {
4214 4214 giveup_current_region();
4215 4215 } else {
4216 4216 move_finger_to(new_finger);
4217 4217 }
4218 4218 }
4219 4219 }
4220 4220 // At this point we have either completed iterating over the
4221 4221 // region we were holding on to, or we have aborted.
4222 4222
4223 4223 // We then partially drain the local queue and the global stack.
4224 4224 // (Do we really need this?)
4225 4225 drain_local_queue(true);
4226 4226 drain_global_stack(true);
4227 4227
4228 4228 // Read the note on the claim_region() method on why it might
4229 4229 // return NULL with potentially more regions available for
4230 4230 // claiming and why we have to check out_of_regions() to determine
4231 4231 // whether we're done or not.
4232 4232 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4233 4233 // We are going to try to claim a new region. We should have
4234 4234 // given up on the previous one.
4235 4235 // Separated the asserts so that we know which one fires.
4236 4236 assert(_curr_region == NULL, "invariant");
4237 4237 assert(_finger == NULL, "invariant");
4238 4238 assert(_region_limit == NULL, "invariant");
4239 4239 if (_cm->verbose_low()) {
4240 4240 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4241 4241 }
4242 4242 HeapRegion* claimed_region = _cm->claim_region(_task_id);
4243 4243 if (claimed_region != NULL) {
4244 4244 // Yes, we managed to claim one
4245 4245 statsOnly( ++_regions_claimed );
4246 4246
4247 4247 if (_cm->verbose_low()) {
4248 4248 gclog_or_tty->print_cr("[%d] we successfully claimed "
4249 4249 "region "PTR_FORMAT,
4250 4250 _task_id, claimed_region);
4251 4251 }
4252 4252
4253 4253 setup_for_region(claimed_region);
4254 4254 assert(_curr_region == claimed_region, "invariant");
4255 4255 }
4256 4256 // It is important to call the regular clock here. It might take
4257 4257 // a while to claim a region if, for example, we hit a large
4258 4258 // block of empty regions. So we need to call the regular clock
4259 4259 // method once round the loop to make sure it's called
4260 4260 // frequently enough.
4261 4261 regular_clock_call();
4262 4262 }
4263 4263
4264 4264 if (!has_aborted() && _curr_region == NULL) {
4265 4265 assert(_cm->out_of_regions(),
4266 4266 "at this point we should be out of regions");
4267 4267 }
4268 4268 } while ( _curr_region != NULL && !has_aborted());
4269 4269
4270 4270 if (!has_aborted()) {
4271 4271 // We cannot check whether the global stack is empty, since other
4272 4272 // tasks might be pushing objects to it concurrently. We also cannot
4273 4273 // check if the region stack is empty because if a thread is aborting
4274 4274 // it can push a partially done region back.
4275 4275 assert(_cm->out_of_regions(),
4276 4276 "at this point we should be out of regions");
4277 4277
4278 4278 if (_cm->verbose_low()) {
4279 4279 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4280 4280 }
4281 4281
4282 4282 // Try to reduce the number of available SATB buffers so that
4283 4283 // remark has less work to do.
4284 4284 drain_satb_buffers();
4285 4285 }
4286 4286
4287 4287 // Since we've done everything else, we can now totally drain the
4288 4288 // local queue and global stack.
4289 4289 drain_local_queue(false);
4290 4290 drain_global_stack(false);
4291 4291
4292 4292 // Attempt at work stealing from other task's queues.
4293 4293 if (do_stealing && !has_aborted()) {
4294 4294 // We have not aborted. This means that we have finished all that
4295 4295 // we could. Let's try to do some stealing...
4296 4296
4297 4297 // We cannot check whether the global stack is empty, since other
4298 4298 // tasks might be pushing objects to it concurrently. We also cannot
4299 4299 // check if the region stack is empty because if a thread is aborting
4300 4300 // it can push a partially done region back.
4301 4301 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4302 4302 "only way to reach here");
4303 4303
4304 4304 if (_cm->verbose_low()) {
4305 4305 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4306 4306 }
4307 4307
4308 4308 while (!has_aborted()) {
4309 4309 oop obj;
4310 4310 statsOnly( ++_steal_attempts );
4311 4311
4312 4312 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4313 4313 if (_cm->verbose_medium()) {
4314 4314 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4315 4315 _task_id, (void*) obj);
4316 4316 }
4317 4317
4318 4318 statsOnly( ++_steals );
4319 4319
4320 4320 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4321 4321 "any stolen object should be marked");
4322 4322 scan_object(obj);
4323 4323
4324 4324 // And since we're towards the end, let's totally drain the
4325 4325 // local queue and global stack.
4326 4326 drain_local_queue(false);
4327 4327 drain_global_stack(false);
4328 4328 } else {
4329 4329 break;
4330 4330 }
4331 4331 }
4332 4332 }
4333 4333
4334 4334 // If we are about to wrap up and go into termination, check if we
4335 4335 // should raise the overflow flag.
4336 4336 if (do_termination && !has_aborted()) {
4337 4337 if (_cm->force_overflow()->should_force()) {
4338 4338 _cm->set_has_overflown();
4339 4339 regular_clock_call();
4340 4340 }
4341 4341 }
4342 4342
4343 4343 // We still haven't aborted. Now, let's try to get into the
4344 4344 // termination protocol.
4345 4345 if (do_termination && !has_aborted()) {
4346 4346 // We cannot check whether the global stack is empty, since other
4347 4347 // tasks might be concurrently pushing objects on it. We also cannot
4348 4348 // check if the region stack is empty because if a thread is aborting
4349 4349 // it can push a partially done region back.
4350 4350 // Separated the asserts so that we know which one fires.
4351 4351 assert(_cm->out_of_regions(), "only way to reach here");
4352 4352 assert(_task_queue->size() == 0, "only way to reach here");
4353 4353
4354 4354 if (_cm->verbose_low()) {
4355 4355 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4356 4356 }
4357 4357
4358 4358 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4359 4359 // The CMTask class also extends the TerminatorTerminator class,
4360 4360 // hence its should_exit_termination() method will also decide
4361 4361 // whether to exit the termination protocol or not.
4362 4362 bool finished = _cm->terminator()->offer_termination(this);
4363 4363 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4364 4364 _termination_time_ms +=
4365 4365 termination_end_time_ms - _termination_start_time_ms;
4366 4366
4367 4367 if (finished) {
4368 4368 // We're all done.
4369 4369
4370 4370 if (_task_id == 0) {
4371 4371 // let's allow task 0 to do this
4372 4372 if (concurrent()) {
4373 4373 assert(_cm->concurrent_marking_in_progress(), "invariant");
4374 4374 // we need to set this to false before the next
4375 4375 // safepoint. This way we ensure that the marking phase
4376 4376 // doesn't observe any more heap expansions.
4377 4377 _cm->clear_concurrent_marking_in_progress();
4378 4378 }
4379 4379 }
4380 4380
4381 4381 // We can now guarantee that the global stack is empty, since
4382 4382 // all other tasks have finished. We separated the guarantees so
4383 4383 // that, if a condition is false, we can immediately find out
4384 4384 // which one.
4385 4385 guarantee(_cm->out_of_regions(), "only way to reach here");
4386 4386 guarantee(_aborted_region.is_empty(), "only way to reach here");
4387 4387 guarantee(_cm->region_stack_empty(), "only way to reach here");
4388 4388 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4389 4389 guarantee(_task_queue->size() == 0, "only way to reach here");
4390 4390 guarantee(!_cm->has_overflown(), "only way to reach here");
4391 4391 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4392 4392 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4393 4393
4394 4394 if (_cm->verbose_low()) {
4395 4395 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4396 4396 }
4397 4397 } else {
4398 4398 // Apparently there's more work to do. Let's abort this task. It
4399 4399 // will restart it and we can hopefully find more things to do.
4400 4400
4401 4401 if (_cm->verbose_low()) {
4402 4402 gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4403 4403 _task_id);
4404 4404 }
4405 4405
4406 4406 set_has_aborted();
4407 4407 statsOnly( ++_aborted_termination );
4408 4408 }
4409 4409 }
4410 4410
4411 4411 // Mainly for debugging purposes to make sure that a pointer to the
4412 4412 // closure which was statically allocated in this frame doesn't
4413 4413 // escape it by accident.
4414 4414 set_cm_oop_closure(NULL);
4415 4415 double end_time_ms = os::elapsedVTime() * 1000.0;
4416 4416 double elapsed_time_ms = end_time_ms - _start_time_ms;
4417 4417 // Update the step history.
4418 4418 _step_times_ms.add(elapsed_time_ms);
4419 4419
4420 4420 if (has_aborted()) {
4421 4421 // The task was aborted for some reason.
4422 4422
4423 4423 statsOnly( ++_aborted );
4424 4424
4425 4425 if (_has_timed_out) {
4426 4426 double diff_ms = elapsed_time_ms - _time_target_ms;
4427 4427 // Keep statistics of how well we did with respect to hitting
4428 4428 // our target only if we actually timed out (if we aborted for
4429 4429 // other reasons, then the results might get skewed).
4430 4430 _marking_step_diffs_ms.add(diff_ms);
4431 4431 }
4432 4432
4433 4433 if (_cm->has_overflown()) {
4434 4434 // This is the interesting one. We aborted because a global
4435 4435 // overflow was raised. This means we have to restart the
4436 4436 // marking phase and start iterating over regions. However, in
4437 4437 // order to do this we have to make sure that all tasks stop
4438 4438 // what they are doing and re-initialise in a safe manner. We
4439 4439 // will achieve this with the use of two barrier sync points.
4440 4440
4441 4441 if (_cm->verbose_low()) {
4442 4442 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4443 4443 }
4444 4444
4445 4445 _cm->enter_first_sync_barrier(_task_id);
4446 4446 // When we exit this sync barrier we know that all tasks have
4447 4447 // stopped doing marking work. So, it's now safe to
4448 4448 // re-initialise our data structures. At the end of this method,
4449 4449 // task 0 will clear the global data structures.
4450 4450
4451 4451 statsOnly( ++_aborted_overflow );
4452 4452
4453 4453 // We clear the local state of this task...
4454 4454 clear_region_fields();
4455 4455
4456 4456 // ...and enter the second barrier.
4457 4457 _cm->enter_second_sync_barrier(_task_id);
4458 4458 // At this point everything has bee re-initialised and we're
4459 4459 // ready to restart.
4460 4460 }
4461 4461
4462 4462 if (_cm->verbose_low()) {
4463 4463 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4464 4464 "elapsed = %1.2lfms <<<<<<<<<<",
4465 4465 _task_id, _time_target_ms, elapsed_time_ms);
4466 4466 if (_cm->has_aborted()) {
4467 4467 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4468 4468 _task_id);
4469 4469 }
4470 4470 }
4471 4471 } else {
4472 4472 if (_cm->verbose_low()) {
4473 4473 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4474 4474 "elapsed = %1.2lfms <<<<<<<<<<",
4475 4475 _task_id, _time_target_ms, elapsed_time_ms);
4476 4476 }
4477 4477 }
4478 4478
4479 4479 _claimed = false;
4480 4480 }
4481 4481
4482 4482 CMTask::CMTask(int task_id,
4483 4483 ConcurrentMark* cm,
4484 4484 CMTaskQueue* task_queue,
4485 4485 CMTaskQueueSet* task_queues)
4486 4486 : _g1h(G1CollectedHeap::heap()),
4487 4487 _task_id(task_id), _cm(cm),
4488 4488 _claimed(false),
4489 4489 _nextMarkBitMap(NULL), _hash_seed(17),
4490 4490 _task_queue(task_queue),
4491 4491 _task_queues(task_queues),
4492 4492 _cm_oop_closure(NULL),
4493 4493 _aborted_region(MemRegion()) {
4494 4494 guarantee(task_queue != NULL, "invariant");
4495 4495 guarantee(task_queues != NULL, "invariant");
4496 4496
4497 4497 statsOnly( _clock_due_to_scanning = 0;
4498 4498 _clock_due_to_marking = 0 );
4499 4499
4500 4500 _marking_step_diffs_ms.add(0.5);
4501 4501 }
4502 4502
4503 4503 // These are formatting macros that are used below to ensure
4504 4504 // consistent formatting. The *_H_* versions are used to format the
4505 4505 // header for a particular value and they should be kept consistent
4506 4506 // with the corresponding macro. Also note that most of the macros add
4507 4507 // the necessary white space (as a prefix) which makes them a bit
4508 4508 // easier to compose.
4509 4509
4510 4510 // All the output lines are prefixed with this string to be able to
4511 4511 // identify them easily in a large log file.
4512 4512 #define G1PPRL_LINE_PREFIX "###"
4513 4513
4514 4514 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4515 4515 #ifdef _LP64
4516 4516 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4517 4517 #else // _LP64
4518 4518 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4519 4519 #endif // _LP64
4520 4520
4521 4521 // For per-region info
4522 4522 #define G1PPRL_TYPE_FORMAT " %-4s"
4523 4523 #define G1PPRL_TYPE_H_FORMAT " %4s"
4524 4524 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4525 4525 #define G1PPRL_BYTE_H_FORMAT " %9s"
4526 4526 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4527 4527 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4528 4528
4529 4529 // For summary info
4530 4530 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4531 4531 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4532 4532 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4533 4533 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4534 4534
4535 4535 G1PrintRegionLivenessInfoClosure::
4536 4536 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4537 4537 : _out(out),
4538 4538 _total_used_bytes(0), _total_capacity_bytes(0),
4539 4539 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4540 4540 _hum_used_bytes(0), _hum_capacity_bytes(0),
4541 4541 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4542 4542 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4543 4543 MemRegion g1_committed = g1h->g1_committed();
4544 4544 MemRegion g1_reserved = g1h->g1_reserved();
4545 4545 double now = os::elapsedTime();
↓ open down ↓ |
4545 lines elided |
↑ open up ↑ |
4546 4546
4547 4547 // Print the header of the output.
4548 4548 _out->cr();
4549 4549 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4550 4550 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4551 4551 G1PPRL_SUM_ADDR_FORMAT("committed")
4552 4552 G1PPRL_SUM_ADDR_FORMAT("reserved")
4553 4553 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4554 4554 g1_committed.start(), g1_committed.end(),
4555 4555 g1_reserved.start(), g1_reserved.end(),
4556 - HeapRegion::GrainBytes);
4556 + (size_t)HeapRegion::GrainBytes);
4557 4557 _out->print_cr(G1PPRL_LINE_PREFIX);
4558 4558 _out->print_cr(G1PPRL_LINE_PREFIX
4559 4559 G1PPRL_TYPE_H_FORMAT
4560 4560 G1PPRL_ADDR_BASE_H_FORMAT
4561 4561 G1PPRL_BYTE_H_FORMAT
4562 4562 G1PPRL_BYTE_H_FORMAT
4563 4563 G1PPRL_BYTE_H_FORMAT
4564 4564 G1PPRL_DOUBLE_H_FORMAT,
4565 4565 "type", "address-range",
4566 4566 "used", "prev-live", "next-live", "gc-eff");
4567 4567 }
4568 4568
4569 4569 // It takes as a parameter a reference to one of the _hum_* fields, it
4570 4570 // deduces the corresponding value for a region in a humongous region
4571 4571 // series (either the region size, or what's left if the _hum_* field
4572 4572 // is < the region size), and updates the _hum_* field accordingly.
4573 4573 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4574 4574 size_t bytes = 0;
4575 4575 // The > 0 check is to deal with the prev and next live bytes which
4576 4576 // could be 0.
4577 4577 if (*hum_bytes > 0) {
4578 4578 bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
4579 4579 *hum_bytes -= bytes;
4580 4580 }
4581 4581 return bytes;
4582 4582 }
4583 4583
4584 4584 // It deduces the values for a region in a humongous region series
4585 4585 // from the _hum_* fields and updates those accordingly. It assumes
4586 4586 // that that _hum_* fields have already been set up from the "starts
4587 4587 // humongous" region and we visit the regions in address order.
4588 4588 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4589 4589 size_t* capacity_bytes,
4590 4590 size_t* prev_live_bytes,
4591 4591 size_t* next_live_bytes) {
4592 4592 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4593 4593 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4594 4594 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4595 4595 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4596 4596 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4597 4597 }
4598 4598
4599 4599 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4600 4600 const char* type = "";
4601 4601 HeapWord* bottom = r->bottom();
4602 4602 HeapWord* end = r->end();
4603 4603 size_t capacity_bytes = r->capacity();
4604 4604 size_t used_bytes = r->used();
4605 4605 size_t prev_live_bytes = r->live_bytes();
4606 4606 size_t next_live_bytes = r->next_live_bytes();
4607 4607 double gc_eff = r->gc_efficiency();
4608 4608 if (r->used() == 0) {
4609 4609 type = "FREE";
4610 4610 } else if (r->is_survivor()) {
4611 4611 type = "SURV";
4612 4612 } else if (r->is_young()) {
4613 4613 type = "EDEN";
4614 4614 } else if (r->startsHumongous()) {
4615 4615 type = "HUMS";
4616 4616
4617 4617 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4618 4618 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4619 4619 "they should have been zeroed after the last time we used them");
4620 4620 // Set up the _hum_* fields.
4621 4621 _hum_capacity_bytes = capacity_bytes;
4622 4622 _hum_used_bytes = used_bytes;
4623 4623 _hum_prev_live_bytes = prev_live_bytes;
4624 4624 _hum_next_live_bytes = next_live_bytes;
4625 4625 get_hum_bytes(&used_bytes, &capacity_bytes,
4626 4626 &prev_live_bytes, &next_live_bytes);
4627 4627 end = bottom + HeapRegion::GrainWords;
4628 4628 } else if (r->continuesHumongous()) {
4629 4629 type = "HUMC";
4630 4630 get_hum_bytes(&used_bytes, &capacity_bytes,
4631 4631 &prev_live_bytes, &next_live_bytes);
4632 4632 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4633 4633 } else {
4634 4634 type = "OLD";
4635 4635 }
4636 4636
4637 4637 _total_used_bytes += used_bytes;
4638 4638 _total_capacity_bytes += capacity_bytes;
4639 4639 _total_prev_live_bytes += prev_live_bytes;
4640 4640 _total_next_live_bytes += next_live_bytes;
4641 4641
4642 4642 // Print a line for this particular region.
4643 4643 _out->print_cr(G1PPRL_LINE_PREFIX
4644 4644 G1PPRL_TYPE_FORMAT
4645 4645 G1PPRL_ADDR_BASE_FORMAT
4646 4646 G1PPRL_BYTE_FORMAT
4647 4647 G1PPRL_BYTE_FORMAT
4648 4648 G1PPRL_BYTE_FORMAT
4649 4649 G1PPRL_DOUBLE_FORMAT,
4650 4650 type, bottom, end,
4651 4651 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4652 4652
4653 4653 return false;
4654 4654 }
4655 4655
4656 4656 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4657 4657 // Print the footer of the output.
4658 4658 _out->print_cr(G1PPRL_LINE_PREFIX);
4659 4659 _out->print_cr(G1PPRL_LINE_PREFIX
4660 4660 " SUMMARY"
4661 4661 G1PPRL_SUM_MB_FORMAT("capacity")
4662 4662 G1PPRL_SUM_MB_PERC_FORMAT("used")
4663 4663 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4664 4664 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4665 4665 bytes_to_mb(_total_capacity_bytes),
4666 4666 bytes_to_mb(_total_used_bytes),
4667 4667 perc(_total_used_bytes, _total_capacity_bytes),
4668 4668 bytes_to_mb(_total_prev_live_bytes),
4669 4669 perc(_total_prev_live_bytes, _total_capacity_bytes),
4670 4670 bytes_to_mb(_total_next_live_bytes),
4671 4671 perc(_total_next_live_bytes, _total_capacity_bytes));
4672 4672 _out->cr();
4673 4673 }
↓ open down ↓ |
107 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX